diff --git a/compiler/rustc_macros/src/query.rs b/compiler/rustc_macros/src/query.rs index 721cc7fe4d9b3..0a741d32ed61d 100644 --- a/compiler/rustc_macros/src/query.rs +++ b/compiler/rustc_macros/src/query.rs @@ -137,54 +137,23 @@ struct CacheOnDiskIf { block: Block, } +/// See `rustc_middle::query::modifiers` for documentation of each query modifier. struct QueryModifiers { - /// The description of the query. - desc: Desc, - - /// Use this type for the in-memory cache. + // tidy-alphabetical-start + anon: Option, arena_cache: Option, - - /// Cache the query to disk if the `Block` returns true. cache_on_disk_if: Option, - - /// A cycle error for this query aborting the compilation with a fatal error. - cycle_fatal: Option, - - /// A cycle error results in a delay_bug call cycle_delay_bug: Option, - - /// A cycle error results in a stashed cycle error that can be unstashed and canceled later + cycle_fatal: Option, cycle_stash: Option, - - /// Don't hash the result, instead just mark a query red if it runs - no_hash: Option, - - /// Generate a dep node based on the dependencies of the query - anon: Option, - - /// Always evaluate the query, ignoring its dependencies - eval_always: Option, - - /// Whether the query has a call depth limit depth_limit: Option, - - /// Use a separate query provider for local and extern crates - separate_provide_extern: Option, - - /// Generate a `feed` method to set the query's value from another query. + desc: Desc, + eval_always: Option, feedable: Option, - - /// When this query is called via `tcx.ensure_ok()`, it returns - /// `Result<(), ErrorGuaranteed>` instead of `()`. If the query needs to - /// be executed, and that execution returns an error, the error result is - /// returned to the caller. - /// - /// If execution is skipped, a synthetic `Ok(())` is returned, on the - /// assumption that a query with all-green inputs must have succeeded. - /// - /// Can only be applied to queries with a return value of - /// `Result<_, ErrorGuaranteed>`. + no_hash: Option, return_result_from_ensure_ok: Option, + separate_provide_extern: Option, + // tidy-alphabetical-end } fn parse_query_modifiers(input: ParseStream<'_>) -> Result { @@ -272,6 +241,68 @@ fn parse_query_modifiers(input: ParseStream<'_>) -> Result { }) } +fn make_modifiers_stream(query: &Query, modifiers: &QueryModifiers) -> proc_macro2::TokenStream { + let QueryModifiers { + // tidy-alphabetical-start + anon, + arena_cache, + cache_on_disk_if, + cycle_delay_bug, + cycle_fatal, + cycle_stash, + depth_limit, + desc: _, + eval_always, + feedable, + no_hash, + return_result_from_ensure_ok, + separate_provide_extern, + // tidy-alphabetical-end + } = modifiers; + + let anon = anon.is_some(); + let arena_cache = arena_cache.is_some(); + let cache_on_disk = cache_on_disk_if.is_some(); + + let cycle_error_handling = if cycle_delay_bug.is_some() { + quote! { DelayBug } + } else if cycle_fatal.is_some() { + quote! { Fatal } + } else if cycle_stash.is_some() { + quote! { Stash } + } else { + quote! { Error } + }; + + let depth_limit = depth_limit.is_some(); + let eval_always = eval_always.is_some(); + let feedable = feedable.is_some(); + let no_hash = no_hash.is_some(); + let return_result_from_ensure_ok = return_result_from_ensure_ok.is_some(); + let separate_provide_extern = separate_provide_extern.is_some(); + + // Giving an input span to the modifier names in the modifier list seems + // to give slightly more helpful errors when one of the callback macros + // fails to parse the modifier list. + let query_name_span = query.name.span(); + quote_spanned! { + query_name_span => + // Search for (QMODLIST) to find all occurrences of this query modifier list. + // tidy-alphabetical-start + anon: #anon, + arena_cache: #arena_cache, + cache_on_disk: #cache_on_disk, + cycle_error_handling: #cycle_error_handling, + depth_limit: #depth_limit, + eval_always: #eval_always, + feedable: #feedable, + no_hash: #no_hash, + return_result_from_ensure_ok: #return_result_from_ensure_ok, + separate_provide_extern: #separate_provide_extern, + // tidy-alphabetical-end + } +} + fn doc_comment_from_desc(list: &Punctuated) -> Result { use ::syn::*; let mut iter = list.iter(); @@ -458,51 +489,13 @@ pub(super) fn rustc_queries(input: TokenStream) -> TokenStream { ReturnType::Type(..) => quote! { #return_ty }, }; - let mut modifiers_out = vec![]; - - macro_rules! passthrough { - ( $( $modifier:ident ),+ $(,)? ) => { - $( if let Some($modifier) = &modifiers.$modifier { - modifiers_out.push(quote! { (#$modifier) }); - }; )+ - } - } - - passthrough!( - arena_cache, - cycle_fatal, - cycle_delay_bug, - cycle_stash, - no_hash, - anon, - eval_always, - feedable, - depth_limit, - separate_provide_extern, - return_result_from_ensure_ok, - ); - - // If there was a `cache_on_disk_if` modifier in the real input, pass - // on a synthetic `(cache_on_disk)` modifier that can be inspected by - // macro-rules macros. - if modifiers.cache_on_disk_if.is_some() { - modifiers_out.push(quote! { (cache_on_disk) }); - } - - // This uses the span of the query definition for the commas, - // which can be important if we later encounter any ambiguity - // errors with any of the numerous macro_rules! macros that - // we use. Using the call-site span would result in a span pointing - // at the entire `rustc_queries!` invocation, which wouldn't - // be very useful. - let span = name.span(); - let modifiers_stream = quote_spanned! { span => #(#modifiers_out),* }; + let modifiers_stream = make_modifiers_stream(&query, modifiers); // Add the query to the group query_stream.extend(quote! { #(#doc_comments)* - [#modifiers_stream] - fn #name(#key_ty) #return_ty, + fn #name(#key_ty) #return_ty + { #modifiers_stream } }); if let Some(feedable) = &modifiers.feedable { diff --git a/compiler/rustc_middle/src/dep_graph/dep_node.rs b/compiler/rustc_middle/src/dep_graph/dep_node.rs index 7efa013c3d999..6f85dba23dd49 100644 --- a/compiler/rustc_middle/src/dep_graph/dep_node.rs +++ b/compiler/rustc_middle/src/dep_graph/dep_node.rs @@ -268,8 +268,10 @@ macro_rules! define_dep_nodes { queries { $( $(#[$q_attr:meta])* - [$($modifiers:tt)*] - fn $q_name:ident($K:ty) -> $V:ty, + fn $q_name:ident($K:ty) -> $V:ty + // Search for (QMODLIST) to find all occurrences of this query modifier list. + // Query modifiers are currently not used here, so skip the whole list. + { $($modifiers:tt)* } )* } non_queries { diff --git a/compiler/rustc_middle/src/query/plumbing.rs b/compiler/rustc_middle/src/query/plumbing.rs index ff6d87298028f..815c8a1baab61 100644 --- a/compiler/rustc_middle/src/query/plumbing.rs +++ b/compiler/rustc_middle/src/query/plumbing.rs @@ -333,44 +333,6 @@ macro_rules! query_helper_param_ty { ($K:ty) => { $K }; } -// Expands to `$yes` if the `arena_cache` modifier is present, `$no` otherwise. -macro_rules! if_arena_cache { - ([] $then:tt $no:tt) => { $no }; - ([(arena_cache) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes }; - ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => { - if_arena_cache!([$($modifiers)*] $yes $no) - }; -} - -// Expands to `$yes` if the `separate_provide_extern` modifier is present, `$no` otherwise. -macro_rules! if_separate_provide_extern { - ([] $then:tt $no:tt) => { $no }; - ([(separate_provide_extern) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes }; - ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => { - if_separate_provide_extern!([$($modifiers)*] $yes $no) - }; -} - -// Expands to `$yes` if the `return_result_from_ensure_ok` modifier is present, `$no` otherwise. -macro_rules! if_return_result_from_ensure_ok { - ([] $then:tt $no:tt) => { $no }; - ([(return_result_from_ensure_ok) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes }; - ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => { - if_return_result_from_ensure_ok!([$($modifiers)*] $yes $no) - }; -} - -// Expands to `$item` if the `feedable` modifier is present. -macro_rules! item_if_feedable { - ([] $($item:tt)*) => {}; - ([(feedable) $($rest:tt)*] $($item:tt)*) => { - $($item)* - }; - ([$other:tt $($modifiers:tt)*] $($item:tt)*) => { - item_if_feedable! { [$($modifiers)*] $($item)* } - }; -} - macro_rules! define_callbacks { ( // You might expect the key to be `$K:ty`, but it needs to be `$($K:tt)*` so that @@ -378,8 +340,20 @@ macro_rules! define_callbacks { queries { $( $(#[$attr:meta])* - [$($modifiers:tt)*] - fn $name:ident($($K:tt)*) -> $V:ty, + fn $name:ident($($K:tt)*) -> $V:ty + { + // Search for (QMODLIST) to find all occurrences of this query modifier list. + anon: $anon:literal, + arena_cache: $arena_cache:literal, + cache_on_disk: $cache_on_disk:literal, + cycle_error_handling: $cycle_error_handling:ident, + depth_limit: $depth_limit:literal, + eval_always: $eval_always:literal, + feedable: $feedable:literal, + no_hash: $no_hash:literal, + return_result_from_ensure_ok: $return_result_from_ensure_ok:literal, + separate_provide_extern: $separate_provide_extern:literal, + } )* } // Non-queries are unused here. @@ -394,20 +368,31 @@ macro_rules! define_callbacks { pub type Key<'tcx> = $($K)*; pub type Value<'tcx> = $V; - pub type LocalKey<'tcx> = if_separate_provide_extern!( - [$($modifiers)*] - ( as $crate::query::AsLocalQueryKey>::LocalQueryKey) - (Key<'tcx>) - ); - - /// This type alias specifies the type returned from query providers and the type - /// used for decoding. For regular queries this is the declared returned type `V`, - /// but `arena_cache` will use `::Provided` instead. - pub type ProvidedValue<'tcx> = if_arena_cache!( - [$($modifiers)*] - ( as $crate::query::arena_cached::ArenaCached<'tcx>>::Provided) - (Value<'tcx>) - ); + /// Key type used by provider functions in `local_providers`. + /// This query has the `separate_provide_extern` modifier. + #[cfg($separate_provide_extern)] + pub type LocalKey<'tcx> = + as $crate::query::AsLocalQueryKey>::LocalQueryKey; + /// Key type used by provider functions in `local_providers`. + #[cfg(not($separate_provide_extern))] + pub type LocalKey<'tcx> = Key<'tcx>; + + /// Return type of the `.ensure_ok()` method for this query, + /// which has the `return_result_from_ensure_ok` modifier. + #[cfg($return_result_from_ensure_ok)] + pub type EnsureOkReturnType = Result<(), rustc_errors::ErrorGuaranteed>; + /// Return type of the `.ensure_ok()` method for this query, + /// which does _not_ have the `return_result_from_ensure_ok` modifier. + #[cfg(not($return_result_from_ensure_ok))] + pub type EnsureOkReturnType = (); + + /// Type returned from query providers and loaded from disk-cache. + #[cfg($arena_cache)] + pub type ProvidedValue<'tcx> = + as $crate::query::arena_cached::ArenaCached<'tcx>>::Provided; + /// Type returned from query providers and loaded from disk-cache. + #[cfg(not($arena_cache))] + pub type ProvidedValue<'tcx> = Value<'tcx>; /// This helper function takes a value returned by the query provider /// (or loaded from disk, or supplied by query feeding), allocates @@ -420,23 +405,23 @@ macro_rules! define_callbacks { ) -> Erased> { // For queries with the `arena_cache` modifier, store the // provided value in an arena and get a reference to it. - let value: Value<'tcx> = if_arena_cache!( - [$($modifiers)*] - { - as $crate::query::arena_cached::ArenaCached>:: - alloc_in_arena - ( - tcx, - &tcx.query_system.arenas.$name, - provided_value, - ) - } - { - // Otherwise, the provided value is the value (and `tcx` is unused). - let _ = tcx; - provided_value - } - ); + #[cfg($arena_cache)] + let value: Value<'tcx> = { + use $crate::query::arena_cached::ArenaCached; + as ArenaCached>::alloc_in_arena( + tcx, + &tcx.query_system.arenas.$name, + provided_value, + ) + }; + + // Otherwise, the provided value is the value (and `tcx` is unused). + #[cfg(not($arena_cache))] + let value: Value<'tcx> = { + let _ = tcx; + provided_value + }; + erase::erase_val(value) } @@ -480,13 +465,11 @@ macro_rules! define_callbacks { #[derive(Default)] pub struct QueryArenas<'tcx> { $( - pub $name: if_arena_cache!( - [$($modifiers)*] - // Use the `ArenaCached` helper trait to determine the arena's value type. - (TypedArena<<$V as $crate::query::arena_cached::ArenaCached<'tcx>>::Allocated>) - // No arena for this query, so the field type is `()`. - () - ), + // Use the `ArenaCached` helper trait to determine the arena's value type. + #[cfg($arena_cache)] + pub $name: TypedArena< + <$V as $crate::query::arena_cached::ArenaCached<'tcx>>::Allocated, + >, )* } @@ -497,16 +480,14 @@ macro_rules! define_callbacks { pub fn $name( self, key: query_helper_param_ty!($($K)*), - ) -> if_return_result_from_ensure_ok!( - [$($modifiers)*] - (Result<(), ErrorGuaranteed>) - () - ) { - if_return_result_from_ensure_ok!( - [$($modifiers)*] - (crate::query::inner::query_ensure_error_guaranteed) - (crate::query::inner::query_ensure) - )( + ) -> $crate::queries::$name::EnsureOkReturnType { + + #[cfg($return_result_from_ensure_ok)] + let ensure_fn = crate::query::inner::query_ensure_error_guaranteed; + #[cfg(not($return_result_from_ensure_ok))] + let ensure_fn = crate::query::inner::query_ensure; + + ensure_fn( self.tcx, &self.tcx.query_system.query_vtables.$name, $crate::query::IntoQueryParam::into_query_param(key), @@ -560,24 +541,22 @@ macro_rules! define_callbacks { } $( - item_if_feedable! { - [$($modifiers)*] - impl<'tcx, K: $crate::query::IntoQueryParam<$name::Key<'tcx>> + Copy> - TyCtxtFeed<'tcx, K> - { - $(#[$attr])* - #[inline(always)] - pub fn $name(self, value: $name::ProvidedValue<'tcx>) { - let key = self.key().into_query_param(); - let erased_value = $name::provided_to_erased(self.tcx, value); - $crate::query::inner::query_feed( - self.tcx, - dep_graph::DepKind::$name, - &self.tcx.query_system.query_vtables.$name, - key, - erased_value, - ); - } + #[cfg($feedable)] + impl<'tcx, K: $crate::query::IntoQueryParam<$name::Key<'tcx>> + Copy> + TyCtxtFeed<'tcx, K> + { + $(#[$attr])* + #[inline(always)] + pub fn $name(self, value: $name::ProvidedValue<'tcx>) { + let key = self.key().into_query_param(); + let erased_value = $name::provided_to_erased(self.tcx, value); + $crate::query::inner::query_feed( + self.tcx, + dep_graph::DepKind::$name, + &self.tcx.query_system.query_vtables.$name, + key, + erased_value, + ); } } )* @@ -602,11 +581,11 @@ macro_rules! define_callbacks { pub struct ExternProviders { $( - pub $name: if_separate_provide_extern!( - [$($modifiers)*] - (for<'tcx> fn(TyCtxt<'tcx>, $name::Key<'tcx>) -> $name::ProvidedValue<'tcx>) - () - ), + #[cfg($separate_provide_extern)] + pub $name: for<'tcx> fn( + TyCtxt<'tcx>, + $name::Key<'tcx>, + ) -> $name::ProvidedValue<'tcx>, )* } @@ -626,13 +605,10 @@ macro_rules! define_callbacks { fn default() -> Self { ExternProviders { $( - $name: if_separate_provide_extern!( - [$($modifiers)*] - (|_, key| $crate::query::plumbing::default_extern_query( - stringify!($name), - &key - )) - () + #[cfg($separate_provide_extern)] + $name: |_, key| $crate::query::plumbing::default_extern_query( + stringify!($name), + &key, ), )* } diff --git a/compiler/rustc_query_impl/src/dep_kind_vtables.rs b/compiler/rustc_query_impl/src/dep_kind_vtables.rs index ceddef5385a58..0f3ef34d936a4 100644 --- a/compiler/rustc_query_impl/src/dep_kind_vtables.rs +++ b/compiler/rustc_query_impl/src/dep_kind_vtables.rs @@ -130,8 +130,20 @@ macro_rules! define_dep_kind_vtables { queries { $( $(#[$attr:meta])* - [$($modifiers:tt)*] - fn $name:ident($K:ty) -> $V:ty, + fn $name:ident($K:ty) -> $V:ty + { + // Search for (QMODLIST) to find all occurrences of this query modifier list. + anon: $anon:literal, + arena_cache: $arena_cache:literal, + cache_on_disk: $cache_on_disk:literal, + cycle_error_handling: $cycle_error_handling:ident, + depth_limit: $depth_limit:literal, + eval_always: $eval_always:literal, + feedable: $feedable:literal, + no_hash: $no_hash:literal, + return_result_from_ensure_ok: $return_result_from_ensure_ok:literal, + separate_provide_extern: $separate_provide_extern:literal, + } )* } non_queries { @@ -154,9 +166,9 @@ macro_rules! define_dep_kind_vtables { $crate::dep_kind_vtables::make_dep_kind_vtable_for_query::< $crate::query_impl::$name::VTableGetter, >( - is_anon!([$($modifiers)*]), - if_cache_on_disk!([$($modifiers)*] true false), - is_eval_always!([$($modifiers)*]), + $anon, + $cache_on_disk, + $eval_always, ) ),* ]; diff --git a/compiler/rustc_query_impl/src/plumbing.rs b/compiler/rustc_query_impl/src/plumbing.rs index 17e6fba8ac9a5..425ca28910073 100644 --- a/compiler/rustc_query_impl/src/plumbing.rs +++ b/compiler/rustc_query_impl/src/plumbing.rs @@ -99,125 +99,6 @@ pub(super) fn try_mark_green<'tcx>(tcx: TyCtxt<'tcx>, dep_node: &DepNode) -> boo tcx.dep_graph.try_mark_green(tcx, dep_node).is_some() } -macro_rules! cycle_error_handling { - ([]) => {{ - rustc_middle::query::CycleErrorHandling::Error - }}; - ([(cycle_fatal) $($rest:tt)*]) => {{ - rustc_middle::query::CycleErrorHandling::Fatal - }}; - ([(cycle_stash) $($rest:tt)*]) => {{ - rustc_middle::query::CycleErrorHandling::Stash - }}; - ([(cycle_delay_bug) $($rest:tt)*]) => {{ - rustc_middle::query::CycleErrorHandling::DelayBug - }}; - ([$other:tt $($modifiers:tt)*]) => { - cycle_error_handling!([$($modifiers)*]) - }; -} - -macro_rules! is_anon { - ([]) => {{ - false - }}; - ([(anon) $($rest:tt)*]) => {{ - true - }}; - ([$other:tt $($modifiers:tt)*]) => { - is_anon!([$($modifiers)*]) - }; -} - -macro_rules! is_eval_always { - ([]) => {{ - false - }}; - ([(eval_always) $($rest:tt)*]) => {{ - true - }}; - ([$other:tt $($modifiers:tt)*]) => { - is_eval_always!([$($modifiers)*]) - }; -} - -macro_rules! is_depth_limit { - ([]) => {{ - false - }}; - ([(depth_limit) $($rest:tt)*]) => {{ - true - }}; - ([$other:tt $($modifiers:tt)*]) => { - is_depth_limit!([$($modifiers)*]) - }; -} - -macro_rules! is_feedable { - ([]) => {{ - false - }}; - ([(feedable) $($rest:tt)*]) => {{ - true - }}; - ([$other:tt $($modifiers:tt)*]) => { - is_feedable!([$($modifiers)*]) - }; -} - -/// Expands to `$yes` if the `no_hash` modifier is present, or `$no` otherwise. -macro_rules! if_no_hash { - ([] $yes:tt $no:tt) => { $no }; - ([(no_hash) $($modifiers:tt)*] $yes:tt $no:tt) => { $yes }; - ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => { - if_no_hash!([$($modifiers)*] $yes $no) - } -} - -macro_rules! call_provider { - ([][$tcx:expr, $name:ident, $key:expr]) => {{ - ($tcx.query_system.local_providers.$name)($tcx, $key) - }}; - ([(separate_provide_extern) $($rest:tt)*][$tcx:expr, $name:ident, $key:expr]) => {{ - if let Some(key) = $key.as_local_key() { - ($tcx.query_system.local_providers.$name)($tcx, key) - } else { - ($tcx.query_system.extern_providers.$name)($tcx, $key) - } - }}; - ([$other:tt $($modifiers:tt)*][$($args:tt)*]) => { - call_provider!([$($modifiers)*][$($args)*]) - }; -} - -/// Expands to one of two token trees, depending on whether the current query -/// has the `cache_on_disk_if` modifier. -macro_rules! if_cache_on_disk { - ([] $yes:tt $no:tt) => { - $no - }; - // The `cache_on_disk_if` modifier generates a synthetic `(cache_on_disk)`, - // modifier, for use by this macro and similar macros. - ([(cache_on_disk) $($rest:tt)*] $yes:tt $no:tt) => { - $yes - }; - ([$other:tt $($modifiers:tt)*] $yes:tt $no:tt) => { - if_cache_on_disk!([$($modifiers)*] $yes $no) - }; -} - -/// Conditionally expands to some token trees, if the current query has the -/// `cache_on_disk_if` modifier. -macro_rules! item_if_cache_on_disk { - ([] $($item:tt)*) => {}; - ([(cache_on_disk) $($rest:tt)*] $($item:tt)*) => { - $($item)* - }; - ([$other:tt $($modifiers:tt)*] $($item:tt)*) => { - item_if_cache_on_disk! { [$($modifiers)*] $($item)* } - }; -} - /// The deferred part of a deferred query stack frame. fn mk_query_stack_frame_extra<'tcx, Cache>( (tcx, vtable, key): (TyCtxt<'tcx>, &'tcx QueryVTable<'tcx, Cache>, Cache::Key), @@ -421,8 +302,20 @@ macro_rules! define_queries { queries { $( $(#[$attr:meta])* - [$($modifiers:tt)*] - fn $name:ident($K:ty) -> $V:ty, + fn $name:ident($K:ty) -> $V:ty + { + // Search for (QMODLIST) to find all occurrences of this query modifier list. + anon: $anon:literal, + arena_cache: $arena_cache:literal, + cache_on_disk: $cache_on_disk:literal, + cycle_error_handling: $cycle_error_handling:ident, + depth_limit: $depth_limit:literal, + eval_always: $eval_always:literal, + feedable: $feedable:literal, + no_hash: $no_hash:literal, + return_result_from_ensure_ok: $return_result_from_ensure_ok:literal, + separate_provide_extern: $separate_provide_extern:literal, + } )* } // Non-queries are unused here. @@ -498,7 +391,16 @@ macro_rules! define_queries { let _guard = tracing::span!(tracing::Level::TRACE, stringify!($name), ?key).entered(); // Call the actual provider function for this query. - let provided_value = call_provider!([$($modifiers)*][tcx, $name, key]); + + #[cfg($separate_provide_extern)] + let provided_value = if let Some(local_key) = key.as_local_key() { + (tcx.query_system.local_providers.$name)(tcx, local_key) + } else { + (tcx.query_system.extern_providers.$name)(tcx, key) + }; + + #[cfg(not($separate_provide_extern))] + let provided_value = (tcx.query_system.local_providers.$name)(tcx, key); rustc_middle::ty::print::with_reduced_queries!({ tracing::trace!(?provided_value); @@ -515,64 +417,67 @@ macro_rules! define_queries { { QueryVTable { name: stringify!($name), - anon: is_anon!([$($modifiers)*]), - eval_always: is_eval_always!([$($modifiers)*]), - depth_limit: is_depth_limit!([$($modifiers)*]), - feedable: is_feedable!([$($modifiers)*]), + anon: $anon, + eval_always: $eval_always, + depth_limit: $depth_limit, + feedable: $feedable, dep_kind: dep_graph::DepKind::$name, - cycle_error_handling: cycle_error_handling!([$($modifiers)*]), + cycle_error_handling: + rustc_middle::query::CycleErrorHandling::$cycle_error_handling, state: Default::default(), cache: Default::default(), - will_cache_on_disk_for_key_fn: if_cache_on_disk!([$($modifiers)*] { - Some(::rustc_middle::queries::_cache_on_disk_if_fns::$name) - } { - None - }), + + #[cfg($cache_on_disk)] + will_cache_on_disk_for_key_fn: + Some(rustc_middle::queries::_cache_on_disk_if_fns::$name), + #[cfg(not($cache_on_disk))] + will_cache_on_disk_for_key_fn: None, + call_query_method_fn: |tcx, key| { // Call the query method for its side-effect of loading a value // from disk-cache; the caller doesn't need the value. let _ = tcx.$name(key); }, invoke_provider_fn: self::invoke_provider_fn::__rust_begin_short_backtrace, - try_load_from_disk_fn: if_cache_on_disk!([$($modifiers)*] { - Some(|tcx, key, prev_index, index| { - // Check the `cache_on_disk_if` condition for this key. - if !::rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) { - return None; - } - - let value: queries::$name::ProvidedValue<'tcx> = - $crate::plumbing::try_load_from_disk(tcx, prev_index, index)?; - - // Arena-alloc the value if appropriate, and erase it. - Some(queries::$name::provided_to_erased(tcx, value)) - }) - } { - None + + #[cfg($cache_on_disk)] + try_load_from_disk_fn: Some(|tcx, key, prev_index, index| { + // Check the `cache_on_disk_if` condition for this key. + if !rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) { + return None; + } + + let value: queries::$name::ProvidedValue<'tcx> = + $crate::plumbing::try_load_from_disk(tcx, prev_index, index)?; + + // Arena-alloc the value if appropriate, and erase it. + Some(queries::$name::provided_to_erased(tcx, value)) }), - is_loadable_from_disk_fn: if_cache_on_disk!([$($modifiers)*] { - Some(|tcx, key, index| -> bool { - ::rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) && - $crate::plumbing::loadable_from_disk(tcx, index) - }) - } { - None + #[cfg(not($cache_on_disk))] + try_load_from_disk_fn: None, + + #[cfg($cache_on_disk)] + is_loadable_from_disk_fn: Some(|tcx, key, index| -> bool { + rustc_middle::queries::_cache_on_disk_if_fns::$name(tcx, key) && + $crate::plumbing::loadable_from_disk(tcx, index) }), + #[cfg(not($cache_on_disk))] + is_loadable_from_disk_fn: None, + value_from_cycle_error: |tcx, cycle, guar| { let result: queries::$name::Value<'tcx> = FromCycleError::from_cycle_error(tcx, cycle, guar); erase::erase_val(result) }, - hash_value_fn: if_no_hash!( - [$($modifiers)*] - None - { - Some(|hcx, erased_value: &erase::Erased>| { - let value = erase::restore_val(*erased_value); - rustc_middle::dep_graph::hash_result(hcx, &value) - }) - } - ), + + #[cfg($no_hash)] + hash_value_fn: None, + #[cfg(not($no_hash))] + hash_value_fn: Some(|hcx, erased_value: &erase::Erased>| { + let value = erase::restore_val(*erased_value); + rustc_middle::dep_graph::hash_result(hcx, &value) + }), + format_value: |value| format!("{:?}", erase::restore_val::>(*value)), description_fn: $crate::queries::_description_fns::$name, execute_query_fn: if incremental { @@ -670,8 +575,8 @@ macro_rules! define_queries { query_result_index: &mut EncodedDepNodeIndex, ) { $( - item_if_cache_on_disk! { - [$($modifiers)*] + #[cfg($cache_on_disk)] + { $crate::plumbing::encode_query_results( tcx, &tcx.query_system.query_vtables.$name, diff --git a/library/stdarch/Cargo.toml b/library/stdarch/Cargo.toml index 5979096439118..e3963a69879a1 100644 --- a/library/stdarch/Cargo.toml +++ b/library/stdarch/Cargo.toml @@ -1,5 +1,5 @@ [workspace] -resolver = "1" +resolver = "3" members = [ "crates/*", "examples", diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 2743896375cf3..a357449d51e3d 100644 --- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -12,7 +12,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ lld -RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-9.58.0-2025-06-16-lin.tar.xz -O sde.tar.xz +RUN wget http://ci-mirrors.rust-lang.org/stdarch/sde-external-10.5.0-2026-01-13-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde RUN tar -xJf sde.tar.xz --strip-components=1 -C intel-sde ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def index 342f7d83a63e3..acf023ed0dc49 100644 --- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def +++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/cpuid.def @@ -12,7 +12,7 @@ # CPUID_VERSION = 1.0 # Input => Output # EAX ECX => EAX EBX ECX EDX -00000000 ******** => 00000024 756e6547 6c65746e 49656e69 +00000000 ******** => 00000029 756e6547 6c65746e 49656e69 00000001 ******** => 00400f10 00100800 7ffaf3ff bfebfbff 00000002 ******** => 76035a01 00f0b6ff 00000000 00c10000 00000003 ******** => 00000000 00000000 00000000 00000000 @@ -48,6 +48,7 @@ 0000001e 00000001 => 000001ff 00000000 00000000 00000000 00000024 00000000 => 00000001 00070002 00000000 00000000 #AVX10 00000024 00000001 => 00000000 00000000 00000004 00000000 +00000029 ******** => 00000000 00000001 00000000 00000000 80000000 ******** => 80000008 00000000 00000000 00000000 80000001 ******** => 00000000 00000000 00000121 2c100000 80000002 ******** => 00000000 00000000 00000000 00000000 diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs index 88afaae8b80d3..de64839661d6e 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/generated.rs @@ -14131,26 +14131,7 @@ pub fn vmlaq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlal_high_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s16)"] @@ -14165,26 +14146,7 @@ pub fn vmlal_high_laneq_s16( c: int16x8_t, ) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlal_high_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlal_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_s32)"] @@ -14195,13 +14157,7 @@ pub fn vmlal_high_laneq_s16( #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlal_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlal_high_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_s32)"] @@ -14216,13 +14172,7 @@ pub fn vmlal_high_laneq_s32( c: int32x4_t, ) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlal_high_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlal_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u16)"] @@ -14237,26 +14187,7 @@ pub fn vmlal_high_lane_u16( c: uint16x4_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlal_high_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u16)"] @@ -14271,26 +14202,7 @@ pub fn vmlal_high_laneq_u16( c: uint16x8_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlal_high_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlal_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_lane_u32)"] @@ -14305,13 +14217,7 @@ pub fn vmlal_high_lane_u32( c: uint32x2_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlal_high_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_laneq_u32)"] @@ -14326,13 +14232,7 @@ pub fn vmlal_high_laneq_u32( c: uint32x4_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlal_high_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlal_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-add long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlal_high_n_s16)"] @@ -14475,26 +14375,7 @@ pub fn vmlsq_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t) -> float64x2_t #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_s16(a: int32x4_t, b: int16x8_t, c: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlsl_high_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s16)"] @@ -14509,26 +14390,7 @@ pub fn vmlsl_high_laneq_s16( c: int16x8_t, ) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlsl_high_s16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlsl_high_s16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_s32)"] @@ -14539,13 +14401,7 @@ pub fn vmlsl_high_laneq_s16( #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmlsl_high_lane_s32(a: int64x2_t, b: int32x4_t, c: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlsl_high_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_s32)"] @@ -14560,13 +14416,7 @@ pub fn vmlsl_high_laneq_s32( c: int32x4_t, ) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlsl_high_s32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlsl_high_s32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u16)"] @@ -14581,26 +14431,7 @@ pub fn vmlsl_high_lane_u16( c: uint16x4_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlsl_high_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u16)"] @@ -14615,26 +14446,7 @@ pub fn vmlsl_high_laneq_u16( c: uint16x8_t, ) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmlsl_high_u16( - a, - b, - simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmlsl_high_u16(a, b, simd_shuffle!(c, c, [LANE as u32; 8])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_lane_u32)"] @@ -14649,13 +14461,7 @@ pub fn vmlsl_high_lane_u32( c: uint32x2_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmlsl_high_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_laneq_u32)"] @@ -14670,13 +14476,7 @@ pub fn vmlsl_high_laneq_u32( c: uint32x4_t, ) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmlsl_high_u32( - a, - b, - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmlsl_high_u32(a, b, simd_shuffle!(c, c, [LANE as u32; 4])) } } #[doc = "Multiply-subtract long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmlsl_high_n_s16)"] @@ -14975,12 +14775,7 @@ pub fn vmul_lane_f64(a: float64x1_t, b: float64x1_t) -> float64 #[cfg(not(target_arch = "arm64ec"))] pub fn vmul_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - simd_mul( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"] @@ -14992,25 +14787,7 @@ pub fn vmul_laneq_f16(a: float16x4_t, b: float16x8_t) -> float1 #[cfg(not(target_arch = "arm64ec"))] pub fn vmulq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - simd_mul( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"] @@ -15104,25 +14881,7 @@ pub fn vmulh_laneq_f16(a: f16, b: float16x8_t) -> f16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmull_high_s16( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s16)"] @@ -15133,25 +14892,7 @@ pub fn vmull_high_lane_s16(a: int16x8_t, b: int16x4_t) -> int32 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmull_high_s16( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmull_high_s16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_s32)"] @@ -15162,12 +14903,7 @@ pub fn vmull_high_laneq_s16(a: int16x8_t, b: int16x8_t) -> int3 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmull_high_s32( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_s32)"] @@ -15178,12 +14914,7 @@ pub fn vmull_high_lane_s32(a: int32x4_t, b: int32x2_t) -> int64 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmull_high_s32( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmull_high_s32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u16)"] @@ -15194,25 +14925,7 @@ pub fn vmull_high_laneq_s32(a: int32x4_t, b: int32x4_t) -> int6 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmull_high_u16( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u16)"] @@ -15223,25 +14936,7 @@ pub fn vmull_high_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uin #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmull_high_u16( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmull_high_u16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_lane_u32)"] @@ -15252,12 +14947,7 @@ pub fn vmull_high_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> ui #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmull_high_u32( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_laneq_u32)"] @@ -15268,12 +14958,7 @@ pub fn vmull_high_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uin #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmull_high_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmull_high_u32( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmull_high_u32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Multiply long"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmull_high_n_s16)"] @@ -15436,7 +15121,7 @@ pub fn vmull_p64(a: p64, b: p64) -> p128 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert!(LANE == 0); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"] @@ -15447,7 +15132,7 @@ pub fn vmulq_lane_f64(a: float64x2_t, b: float64x1_t) -> float6 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) } } #[doc = "Floating-point multiply"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"] @@ -15599,12 +15284,7 @@ pub fn vmulxq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { #[cfg(not(target_arch = "arm64ec"))] pub fn vmulx_lane_f16(a: float16x4_t, b: float16x4_t) -> float16x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmulx_f16( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f16)"] @@ -15616,12 +15296,7 @@ pub fn vmulx_lane_f16(a: float16x4_t, b: float16x4_t) -> float1 #[cfg(not(target_arch = "arm64ec"))] pub fn vmulx_laneq_f16(a: float16x4_t, b: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmulx_f16( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmulx_f16(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f16)"] @@ -15633,25 +15308,7 @@ pub fn vmulx_laneq_f16(a: float16x4_t, b: float16x8_t) -> float #[cfg(not(target_arch = "arm64ec"))] pub fn vmulxq_lane_f16(a: float16x8_t, b: float16x4_t) -> float16x8_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmulxq_f16( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f16)"] @@ -15663,25 +15320,7 @@ pub fn vmulxq_lane_f16(a: float16x8_t, b: float16x4_t) -> float #[cfg(not(target_arch = "arm64ec"))] pub fn vmulxq_laneq_f16(a: float16x8_t, b: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(LANE, 3); - unsafe { - vmulxq_f16( - a, - simd_shuffle!( - b, - b, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ), - ) - } + unsafe { vmulxq_f16(a, simd_shuffle!(b, b, [LANE as u32; 8])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f32)"] @@ -15692,7 +15331,7 @@ pub fn vmulxq_laneq_f16(a: float16x8_t, b: float16x8_t) -> floa #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_laneq_f32)"] @@ -15703,7 +15342,7 @@ pub fn vmulx_lane_f32(a: float32x2_t, b: float32x2_t) -> float3 #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { static_assert_uimm_bits!(LANE, 2); - unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + unsafe { vmulx_f32(a, simd_shuffle!(b, b, [LANE as u32; 2])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_lane_f32)"] @@ -15714,12 +15353,7 @@ pub fn vmulx_laneq_f32(a: float32x2_t, b: float32x4_t) -> float #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 1); - unsafe { - vmulxq_f32( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f32)"] @@ -15730,12 +15364,7 @@ pub fn vmulxq_lane_f32(a: float32x4_t, b: float32x2_t) -> float #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { static_assert_uimm_bits!(LANE, 2); - unsafe { - vmulxq_f32( - a, - simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]), - ) - } + unsafe { vmulxq_f32(a, simd_shuffle!(b, b, [LANE as u32; 4])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulxq_laneq_f64)"] @@ -15746,7 +15375,7 @@ pub fn vmulxq_laneq_f32(a: float32x4_t, b: float32x4_t) -> floa #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_laneq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t { static_assert_uimm_bits!(LANE, 1); - unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) } } #[doc = "Floating-point multiply extended"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulx_lane_f64)"] @@ -15916,7 +15545,7 @@ pub fn vmulxh_laneq_f16(a: f16, b: float16x8_t) -> f16 { #[stable(feature = "neon_intrinsics", since = "1.59.0")] pub fn vmulxq_lane_f64(a: float64x2_t, b: float64x1_t) -> float64x2_t { static_assert!(LANE == 0); - unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) } + unsafe { vmulxq_f64(a, simd_shuffle!(b, b, [LANE as u32; 2])) } } #[doc = "Negate"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vneg_f64)"] @@ -17916,8 +17545,7 @@ pub fn vqnegd_s64(a: i64) -> i64 { pub fn vqrdmlah_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int16x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlah_s16(a, b, c) } } @@ -17931,7 +17559,7 @@ pub fn vqrdmlah_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4 pub fn vqrdmlah_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); vqrdmlah_s32(a, b, c) } } @@ -17945,8 +17573,7 @@ pub fn vqrdmlah_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2 pub fn vqrdmlah_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 3); unsafe { - let c: int16x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlah_s16(a, b, c) } } @@ -17960,7 +17587,7 @@ pub fn vqrdmlah_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x pub fn vqrdmlah_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); vqrdmlah_s32(a, b, c) } } @@ -17974,20 +17601,7 @@ pub fn vqrdmlah_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x pub fn vqrdmlahq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int16x8_t = simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ); + let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); vqrdmlahq_s16(a, b, c) } } @@ -18001,8 +17615,7 @@ pub fn vqrdmlahq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x pub fn vqrdmlahq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); unsafe { - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlahq_s32(a, b, c) } } @@ -18016,20 +17629,7 @@ pub fn vqrdmlahq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x pub fn vqrdmlahq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); unsafe { - let c: int16x8_t = simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ); + let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); vqrdmlahq_s16(a, b, c) } } @@ -18043,8 +17643,7 @@ pub fn vqrdmlahq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16 pub fn vqrdmlahq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlahq_s32(a, b, c) } } @@ -18190,8 +17789,7 @@ pub fn vqrdmlahs_s32(a: i32, b: i32, c: i32) -> i32 { pub fn vqrdmlsh_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int16x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlsh_s16(a, b, c) } } @@ -18205,7 +17803,7 @@ pub fn vqrdmlsh_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4 pub fn vqrdmlsh_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 1); unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); vqrdmlsh_s32(a, b, c) } } @@ -18219,8 +17817,7 @@ pub fn vqrdmlsh_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2 pub fn vqrdmlsh_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(LANE, 3); unsafe { - let c: int16x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int16x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlsh_s16(a, b, c) } } @@ -18234,7 +17831,7 @@ pub fn vqrdmlsh_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x pub fn vqrdmlsh_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]); + let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32; 2]); vqrdmlsh_s32(a, b, c) } } @@ -18248,20 +17845,7 @@ pub fn vqrdmlsh_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x pub fn vqrdmlshq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int16x8_t = simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ); + let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); vqrdmlshq_s16(a, b, c) } } @@ -18275,8 +17859,7 @@ pub fn vqrdmlshq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x pub fn vqrdmlshq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 1); unsafe { - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlshq_s32(a, b, c) } } @@ -18290,20 +17873,7 @@ pub fn vqrdmlshq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x pub fn vqrdmlshq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(LANE, 3); unsafe { - let c: int16x8_t = simd_shuffle!( - c, - c, - [ - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32, - LANE as u32 - ] - ); + let c: int16x8_t = simd_shuffle!(c, c, [LANE as u32; 8]); vqrdmlshq_s16(a, b, c) } } @@ -18317,8 +17887,7 @@ pub fn vqrdmlshq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16 pub fn vqrdmlshq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { static_assert_uimm_bits!(LANE, 2); unsafe { - let c: int32x4_t = - simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + let c: int32x4_t = simd_shuffle!(c, c, [LANE as u32; 4]); vqrdmlshq_s32(a, b, c) } } @@ -25039,16 +24608,9 @@ pub unsafe fn vst1q_lane_f64(a: *mut f64, b: float64x2_t) { #[inline(always)] #[target_feature(enable = "neon")] #[stable(feature = "neon_intrinsics", since = "1.59.0")] -#[cfg_attr(test, assert_instr(st1))] +#[cfg_attr(test, assert_instr(stp))] pub unsafe fn vst2_f64(a: *mut f64, b: float64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v1f64.p0" - )] - fn _vst2_f64(a: float64x1_t, b: float64x1_t, ptr: *mut i8); - } - _vst2_f64(b.0, b.1, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_lane_f64)"] @@ -25125,14 +24687,7 @@ pub unsafe fn vst2_lane_u64(a: *mut u64, b: uint64x1x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2f64.p0" - )] - fn _vst2q_f64(a: float64x2_t, b: float64x2_t, ptr: *mut i8); - } - _vst2q_f64(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(f64, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s64)"] @@ -25143,14 +24698,7 @@ pub unsafe fn vst2q_f64(a: *mut f64, b: float64x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s64(a: *mut i64, b: int64x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2i64.p0" - )] - fn _vst2q_s64(a: int64x2_t, b: int64x2_t, ptr: *mut i8); - } - _vst2q_s64(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i64, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_lane_f64)"] @@ -25295,14 +24843,7 @@ pub unsafe fn vst2q_u64(a: *mut u64, b: uint64x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst3_f64(a: *mut f64, b: float64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v1f64.p0" - )] - fn _vst3_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, ptr: *mut i8); - } - _vst3_f64(b.0, b.1, b.2, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_lane_f64)"] @@ -25379,14 +24920,7 @@ pub unsafe fn vst3_lane_u64(a: *mut u64, b: uint64x1x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v2f64.p0" - )] - fn _vst3q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, ptr: *mut i8); - } - _vst3q_f64(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(f64, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s64)"] @@ -25397,14 +24931,7 @@ pub unsafe fn vst3q_f64(a: *mut f64, b: float64x2x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st3))] pub unsafe fn vst3q_s64(a: *mut i64, b: int64x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v2i64.p0" - )] - fn _vst3q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i8); - } - _vst3q_s64(b.0, b.1, b.2, a as _) + crate::core_arch::macros::interleaving_store!(i64, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_lane_f64)"] @@ -25549,14 +25076,7 @@ pub unsafe fn vst3q_u64(a: *mut u64, b: uint64x2x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst4_f64(a: *mut f64, b: float64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v1f64.p0" - )] - fn _vst4_f64(a: float64x1_t, b: float64x1_t, c: float64x1_t, d: float64x1_t, ptr: *mut i8); - } - _vst4_f64(b.0, b.1, b.2, b.3, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f64)"] @@ -25647,14 +25167,7 @@ pub unsafe fn vst4_lane_u64(a: *mut u64, b: uint64x1x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2f64.p0" - )] - fn _vst4q_f64(a: float64x2_t, b: float64x2_t, c: float64x2_t, d: float64x2_t, ptr: *mut i8); - } - _vst4q_f64(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(f64, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s64)"] @@ -25665,14 +25178,7 @@ pub unsafe fn vst4q_f64(a: *mut f64, b: float64x2x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s64(a: *mut i64, b: int64x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2i64.p0" - )] - fn _vst4q_s64(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i8); - } - _vst4q_s64(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i64, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_lane_f64)"] diff --git a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs index 2df4ba7443314..2fbd2255aa0fd 100644 --- a/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs +++ b/library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs @@ -1050,6 +1050,14 @@ mod tests { test_vld1q_f16_x2(f16, 16, float16x8x2_t, vst1q_f16_x2, vld1q_f16_x2); test_vld1q_f16_x3(f16, 24, float16x8x3_t, vst1q_f16_x3, vld1q_f16_x3); test_vld1q_f16_x4(f16, 32, float16x8x4_t, vst1q_f16_x4, vld1q_f16_x4); + + test_vld2_f16_x2(f16, 8, float16x4x2_t, vst2_f16, vld2_f16); + test_vld2_f16_x3(f16, 12, float16x4x3_t, vst3_f16, vld3_f16); + test_vld2_f16_x4(f16, 16, float16x4x4_t, vst4_f16, vld4_f16); + + test_vld2q_f16_x2(f16, 16, float16x8x2_t, vst2q_f16, vld2q_f16); + test_vld3q_f16_x3(f16, 24, float16x8x3_t, vst3q_f16, vld3q_f16); + test_vld4q_f16_x4(f16, 32, float16x8x4_t, vst4q_f16, vld4q_f16); } macro_rules! wide_store_load_roundtrip_aes { diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs index 45c83b880e907..a7c33917a88cc 100644 --- a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -10149,7 +10149,7 @@ pub fn vdotq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { #[cfg(not(target_arch = "arm64ec"))] pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_f16)"] @@ -10174,13 +10174,7 @@ pub fn vdup_lane_f16(a: float16x4_t) -> float16x4_t { #[cfg(not(target_arch = "arm64ec"))] pub fn vdupq_lane_f16(a: float16x4_t) -> float16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_f32)"] @@ -10341,7 +10335,7 @@ pub fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { )] pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s16)"] @@ -10364,7 +10358,7 @@ pub fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { )] pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u16)"] @@ -10387,7 +10381,7 @@ pub fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { )] pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { static_assert_uimm_bits!(N, 2); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p16)"] @@ -10410,13 +10404,7 @@ pub fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { )] pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s16)"] @@ -10439,13 +10427,7 @@ pub fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { )] pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u16)"] @@ -10468,13 +10450,7 @@ pub fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { )] pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { static_assert_uimm_bits!(N, 2); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_p8)"] @@ -10497,13 +10473,7 @@ pub fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { )] pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s8)"] @@ -10526,13 +10496,7 @@ pub fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { )] pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_u8)"] @@ -10555,13 +10519,7 @@ pub fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { )] pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_p8)"] @@ -10584,16 +10542,7 @@ pub fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { )] pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [ - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 - ] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_s8)"] @@ -10616,16 +10565,7 @@ pub fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { )] pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [ - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 - ] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_lane_u8)"] @@ -10648,16 +10588,7 @@ pub fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { )] pub fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [ - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 - ] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_lane_s64)"] @@ -10728,7 +10659,7 @@ pub fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { #[cfg(not(target_arch = "arm64ec"))] pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_f16)"] @@ -10753,13 +10684,7 @@ pub fn vdup_laneq_f16(a: float16x8_t) -> float16x4_t { #[cfg(not(target_arch = "arm64ec"))] pub fn vdupq_laneq_f16(a: float16x8_t) -> float16x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_f32)"] @@ -10920,7 +10845,7 @@ pub fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { )] pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s16)"] @@ -10943,7 +10868,7 @@ pub fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { )] pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u16)"] @@ -10966,7 +10891,7 @@ pub fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { )] pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { static_assert_uimm_bits!(N, 3); - unsafe { simd_shuffle!(a, a, [N as u32, N as u32, N as u32, N as u32]) } + unsafe { simd_shuffle!(a, a, [N as u32; 4]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p16)"] @@ -10989,13 +10914,7 @@ pub fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { )] pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s16)"] @@ -11018,13 +10937,7 @@ pub fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { )] pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u16)"] @@ -11047,13 +10960,7 @@ pub fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { )] pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { static_assert_uimm_bits!(N, 3); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_p8)"] @@ -11076,13 +10983,7 @@ pub fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { )] pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s8)"] @@ -11105,13 +11006,7 @@ pub fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { )] pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_u8)"] @@ -11134,13 +11029,7 @@ pub fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { )] pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - a, - [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 8]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_p8)"] @@ -11163,16 +11052,7 @@ pub fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { )] pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - a, - [ - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 - ] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_s8)"] @@ -11195,16 +11075,7 @@ pub fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { )] pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - a, - [ - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 - ] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdupq_laneq_u8)"] @@ -11227,16 +11098,7 @@ pub fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { )] pub fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { static_assert_uimm_bits!(N, 4); - unsafe { - simd_shuffle!( - a, - a, - [ - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, - N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32 - ] - ) - } + unsafe { simd_shuffle!(a, a, [N as u32; 16]) } } #[doc = "Set all vector lanes to the same value"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdup_laneq_s64)"] @@ -35894,7 +35756,7 @@ pub fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { static_assert_uimm_bits!(N, 2); unsafe { - let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]); + let b: int16x4_t = simd_shuffle!(b, b, [N as u32; 4]); vqdmull_s16(a, b) } } @@ -35920,7 +35782,7 @@ pub fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { pub fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { static_assert_uimm_bits!(N, 1); unsafe { - let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]); + let b: int32x2_t = simd_shuffle!(b, b, [N as u32; 2]); vqdmull_s32(a, b) } } @@ -37480,17 +37342,7 @@ pub fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] fn _vqrshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; } - unsafe { - _vqrshrn_n_u16( - a, - const { - uint16x8_t([ - -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, - -N as u16, - ]) - }, - ) - } + unsafe { _vqrshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } } #[doc = "Unsigned signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u32)"] @@ -37506,12 +37358,7 @@ pub fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] fn _vqrshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; } - unsafe { - _vqrshrn_n_u32( - a, - const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }, - ) - } + unsafe { _vqrshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } } #[doc = "Unsigned signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u64)"] @@ -37527,7 +37374,7 @@ pub fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] fn _vqrshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; } - unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) } + unsafe { _vqrshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } } #[doc = "Unsigned signed saturating rounded shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqrshrn_n_u16)"] @@ -38922,17 +38769,7 @@ pub fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] fn _vqshrn_n_u16(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; } - unsafe { - _vqshrn_n_u16( - a, - const { - uint16x8_t([ - -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, - -N as u16, - ]) - }, - ) - } + unsafe { _vqshrn_n_u16(a, const { uint16x8_t([-N as u16; 8]) }) } } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u32)"] @@ -38948,12 +38785,7 @@ pub fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] fn _vqshrn_n_u32(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; } - unsafe { - _vqshrn_n_u32( - a, - const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }, - ) - } + unsafe { _vqshrn_n_u32(a, const { uint32x4_t([-N as u32; 4]) }) } } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u64)"] @@ -38969,7 +38801,7 @@ pub fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] fn _vqshrn_n_u64(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; } - unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64, -N as u64]) }) } + unsafe { _vqshrn_n_u64(a, const { uint64x2_t([-N as u64; 2]) }) } } #[doc = "Unsigned saturating shift right narrow"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqshrn_n_u16)"] @@ -66001,14 +65833,7 @@ pub unsafe fn vst2q_f16(a: *mut f16, b: float16x8x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2f32.p0" - )] - fn _vst2_f32(a: float32x2_t, b: float32x2_t, ptr: *mut i8); - } - _vst2_f32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(f32, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_f32)"] @@ -66020,14 +65845,7 @@ pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4f32.p0" - )] - fn _vst2q_f32(a: float32x4_t, b: float32x4_t, ptr: *mut i8); - } - _vst2q_f32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(f32, 4, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s8)"] @@ -66039,14 +65857,7 @@ pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v8i8.p0" - )] - fn _vst2_s8(a: int8x8_t, b: int8x8_t, ptr: *mut i8); - } - _vst2_s8(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i8, 8, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s8)"] @@ -66058,14 +65869,7 @@ pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v16i8.p0" - )] - fn _vst2q_s8(a: int8x16_t, b: int8x16_t, ptr: *mut i8); - } - _vst2q_s8(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i8, 16, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s16)"] @@ -66077,14 +65881,7 @@ pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4i16.p0" - )] - fn _vst2_s16(a: int16x4_t, b: int16x4_t, ptr: *mut i8); - } - _vst2_s16(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i16, 4, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s16)"] @@ -66096,14 +65893,7 @@ pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v8i16.p0" - )] - fn _vst2q_s16(a: int16x8_t, b: int16x8_t, ptr: *mut i8); - } - _vst2q_s16(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i16, 8, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s32)"] @@ -66115,14 +65905,7 @@ pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v2i32.p0" - )] - fn _vst2_s32(a: int32x2_t, b: int32x2_t, ptr: *mut i8); - } - _vst2_s32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i32, 2, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2q_s32)"] @@ -66134,14 +65917,7 @@ pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st2))] pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v4i32.p0" - )] - fn _vst2q_s32(a: int32x4_t, b: int32x4_t, ptr: *mut i8); - } - _vst2q_s32(b.0, b.1, a as _) + crate::core_arch::macros::interleaving_store!(i32, 4, 2, a, b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_f32)"] @@ -66865,11 +66641,7 @@ pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.v1i64.p0")] - fn _vst2_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32); - } - _vst2_s64(a as _, b.0, b.1, 8) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_s64)"] @@ -66881,14 +66653,7 @@ pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st2.v1i64.p0" - )] - fn _vst2_s64(a: int64x1_t, b: int64x1_t, ptr: *mut i8); - } - _vst2_s64(b.0, b.1, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 2-element structures from two registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst2_u64)"] @@ -67233,11 +66998,7 @@ pub unsafe fn vst3q_f16(a: *mut f16, b: float16x8x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2f32")] - fn _vst3_f32(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32); - } - _vst3_f32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(f32, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_f32)"] @@ -67249,11 +67010,7 @@ pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4f32")] - fn _vst3q_f32(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32); - } - _vst3q_f32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(f32, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s8)"] @@ -67265,11 +67022,7 @@ pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i8")] - fn _vst3_s8(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32); - } - _vst3_s8(a as _, b.0, b.1, b.2, 1) + crate::core_arch::macros::interleaving_store!(i8, 8, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s8)"] @@ -67281,11 +67034,7 @@ pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v16i8")] - fn _vst3q_s8(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32); - } - _vst3q_s8(a as _, b.0, b.1, b.2, 1) + crate::core_arch::macros::interleaving_store!(i8, 16, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s16)"] @@ -67297,11 +67046,7 @@ pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i16")] - fn _vst3_s16(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32); - } - _vst3_s16(a as _, b.0, b.1, b.2, 2) + crate::core_arch::macros::interleaving_store!(i16, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s16)"] @@ -67313,11 +67058,7 @@ pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v8i16")] - fn _vst3q_s16(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32); - } - _vst3q_s16(a as _, b.0, b.1, b.2, 2) + crate::core_arch::macros::interleaving_store!(i16, 8, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s32)"] @@ -67329,11 +67070,7 @@ pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v2i32")] - fn _vst3_s32(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32); - } - _vst3_s32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(i32, 2, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3q_s32)"] @@ -67345,11 +67082,7 @@ pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(vst3))] pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v4i32")] - fn _vst3q_s32(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32); - } - _vst3q_s32(a as _, b.0, b.1, b.2, 4) + crate::core_arch::macros::interleaving_store!(i32, 4, 3, a, b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_f32)"] @@ -68153,14 +67886,7 @@ pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st3.v1i64.p0" - )] - fn _vst3_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8); - } - _vst3_s64(b.0, b.1, b.2, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_s64)"] @@ -68172,11 +67898,7 @@ pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0.v1i64")] - fn _vst3_s64(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32); - } - _vst3_s64(a as _, b.0, b.1, b.2, 8) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 3-element structures from three registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst3_u64)"] @@ -68712,14 +68434,7 @@ pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2f32.p0" - )] - fn _vst4_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8); - } - _vst4_f32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(f32, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_f32)"] @@ -68731,14 +68446,7 @@ pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4f32.p0" - )] - fn _vst4q_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8); - } - _vst4q_f32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(f32, 4, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s8)"] @@ -68750,14 +68458,7 @@ pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v8i8.p0" - )] - fn _vst4_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); - } - _vst4_s8(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i8, 8, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s8)"] @@ -68769,14 +68470,7 @@ pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v16i8.p0" - )] - fn _vst4q_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); - } - _vst4q_s8(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i8, 16, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s16)"] @@ -68788,14 +68482,7 @@ pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4i16.p0" - )] - fn _vst4_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8); - } - _vst4_s16(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i16, 4, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s16)"] @@ -68807,14 +68494,7 @@ pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v8i16.p0" - )] - fn _vst4q_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8); - } - _vst4q_s16(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i16, 8, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s32)"] @@ -68826,14 +68506,7 @@ pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v2i32.p0" - )] - fn _vst4_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8); - } - _vst4_s32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i32, 2, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4q_s32)"] @@ -68845,14 +68518,7 @@ pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(st4))] pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v4i32.p0" - )] - fn _vst4q_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8); - } - _vst4q_s32(b.0, b.1, b.2, b.3, a as _) + crate::core_arch::macros::interleaving_store!(i32, 4, 4, a, b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_lane_f16)"] @@ -69576,18 +69242,7 @@ pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { #[unstable(feature = "stdarch_arm_neon_intrinsics", issue = "111800")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0.v1i64")] - fn _vst4_s64( - ptr: *mut i8, - a: int64x1_t, - b: int64x1_t, - c: int64x1_t, - d: int64x1_t, - size: i32, - ); - } - _vst4_s64(a as _, b.0, b.1, b.2, b.3, 8) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_s64)"] @@ -69599,14 +69254,7 @@ pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { #[stable(feature = "neon_intrinsics", since = "1.59.0")] #[cfg_attr(test, assert_instr(nop))] pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { - unsafe extern "unadjusted" { - #[cfg_attr( - any(target_arch = "aarch64", target_arch = "arm64ec"), - link_name = "llvm.aarch64.neon.st4.v1i64.p0" - )] - fn _vst4_s64(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8); - } - _vst4_s64(b.0, b.1, b.2, b.3, a as _) + core::ptr::write_unaligned(a.cast(), b) } #[doc = "Store multiple 4-element structures from four registers"] #[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vst4_u64)"] diff --git a/library/stdarch/crates/core_arch/src/macros.rs b/library/stdarch/crates/core_arch/src/macros.rs index 9f6922efeeb7d..00e92428b3e7e 100644 --- a/library/stdarch/crates/core_arch/src/macros.rs +++ b/library/stdarch/crates/core_arch/src/macros.rs @@ -187,6 +187,17 @@ macro_rules! simd_masked_store { }; } +/// The first N indices `[0, 1, 2, ...]`. +pub(crate) const fn identity() -> [u32; N] { + let mut out = [0u32; N]; + let mut i = 0usize; + while i < N { + out[i] = i as u32; + i += 1; + } + out +} + /// The first N even indices `[0, 2, 4, ...]`. pub(crate) const fn even() -> [u32; N] { let mut out = [0u32; N]; @@ -226,12 +237,12 @@ macro_rules! deinterleaving_load { ($elem:ty, $lanes:literal, 2, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; - use $crate::{mem::transmute, ptr}; + use $crate::mem::transmute; type V = Simd<$elem, $lanes>; type W = Simd<$elem, { $lanes * 2 }>; - let w: W = ptr::read_unaligned($ptr as *const W); + let w: W = $crate::ptr::read_unaligned($ptr as *const W); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 2, 0>()); let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 2, 1>()); @@ -242,12 +253,20 @@ macro_rules! deinterleaving_load { ($elem:ty, $lanes:literal, 3, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; - use $crate::{mem::transmute, ptr}; + use $crate::mem::{MaybeUninit, transmute}; type V = Simd<$elem, $lanes>; type W = Simd<$elem, { $lanes * 3 }>; - let w: W = ptr::read_unaligned($ptr as *const W); + // NOTE: repr(simd) adds padding to make the total size a power of two. + // Hence reading W from ptr might read out of bounds. + let mut mem = MaybeUninit::::uninit(); + $crate::ptr::copy_nonoverlapping( + $ptr.cast::<$elem>(), + mem.as_mut_ptr().cast::<$elem>(), + $lanes * 3, + ); + let w = mem.assume_init(); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>()); let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>()); @@ -259,12 +278,12 @@ macro_rules! deinterleaving_load { ($elem:ty, $lanes:literal, 4, $ptr:expr) => {{ use $crate::core_arch::macros::deinterleave_mask; use $crate::core_arch::simd::Simd; - use $crate::{mem::transmute, ptr}; + use $crate::mem::transmute; type V = Simd<$elem, $lanes>; type W = Simd<$elem, { $lanes * 4 }>; - let w: W = ptr::read_unaligned($ptr as *const W); + let w: W = $crate::ptr::read_unaligned($ptr as *const W); let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 0>()); let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 4, 1>()); @@ -277,3 +296,66 @@ macro_rules! deinterleaving_load { #[allow(unused)] pub(crate) use deinterleaving_load; + +pub(crate) const fn interleave_mask() +-> [u32; LANES] { + let mut out = [0u32; LANES]; + let mut j = 0usize; + while j < LANES { + out[j] = ((j % K) * N + j / K) as u32; + j += 1; + } + out +} + +#[allow(unused)] +macro_rules! interleaving_store { + ($elem:ty, $lanes:literal, 2, $ptr:expr, $v:expr) => {{ + use $crate::core_arch::macros::interleave_mask; + use $crate::core_arch::simd::Simd; + + type W = Simd<$elem, { $lanes * 2 }>; + let w: W = simd_shuffle!($v.0, $v.1, interleave_mask::<{ $lanes * 2 }, $lanes, 2>()); + $crate::ptr::write_unaligned($ptr as *mut W, w); + }}; + + // N = 3 + ($elem:ty, $lanes:literal, 3, $ptr:expr, $v:expr) => {{ + use $crate::core_arch::macros::{identity, interleave_mask}; + use $crate::core_arch::simd::Simd; + + let v0v1: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>()); + let v2v2: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>()); + + type W = Simd<$elem, { $lanes * 3 }>; + + // NOTE: repr(simd) adds padding to make the total size a power of two. + // Hence writing W to ptr might write out of bounds. + let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>()); + $crate::ptr::copy_nonoverlapping( + (&w as *const W).cast::<$elem>(), + $ptr.cast::<$elem>(), + $lanes * 3, + ); + }}; + + // N = 4 + ($elem:ty, $lanes:literal, 4, $ptr:expr, $v:expr) => {{ + use $crate::core_arch::macros::{identity, interleave_mask}; + use $crate::core_arch::simd::Simd; + + let v0v1: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.0, $v.1, identity::<{ $lanes * 2 }>()); + let v2v3: Simd<$elem, { $lanes * 2 }> = + simd_shuffle!($v.2, $v.3, identity::<{ $lanes * 2 }>()); + + type W = Simd<$elem, { $lanes * 4 }>; + let w: W = simd_shuffle!(v0v1, v2v3, interleave_mask::<{ $lanes * 4 }, $lanes, 4>()); + $crate::ptr::write_unaligned($ptr as *mut W, w); + }}; +} + +#[allow(unused)] +pub(crate) use interleaving_store; diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index 04a88e461f752..b49ad9522a412 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -2315,7 +2315,7 @@ pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { unsafe { transmute(simd_or(a.as_i32x8(), b.as_i32x8())) } } -/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi16) @@ -2323,11 +2323,33 @@ pub const fn _mm256_or_si256(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpacksswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(packsswb(a.as_i16x16(), b.as_i16x16())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = simd_splat(i8::MAX as i16); + let min = simd_splat(i8::MIN as i16); + + let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min) + .as_m256i() + .as_i8x32(); + let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min) + .as_m256i() + .as_i8x32(); + + #[rustfmt::skip] + const IDXS: [u32; 32] = [ + 00, 02, 04, 06, 08, 10, 12, 14, // a-lo i16 to i8 conversions + 32, 34, 36, 38, 40, 42, 44, 46, // b-lo + 16, 18, 20, 22, 24, 26, 28, 30, // a-hi + 48, 50, 52, 54, 56, 58, 60, 62, // b-hi + ]; + let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m256i() + } } -/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32) @@ -2335,11 +2357,33 @@ pub fn _mm256_packs_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackssdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(packssdw(a.as_i32x8(), b.as_i32x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = simd_splat(i16::MAX as i32); + let min = simd_splat(i16::MIN as i32); + + let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min) + .as_m256i() + .as_i16x16(); + let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min) + .as_m256i() + .as_i16x16(); + + #[rustfmt::skip] + const IDXS: [u32; 16] = [ + 00, 02, 04, 06, // a-lo i32 to i16 conversions + 16, 18, 20, 22, // b-lo + 08, 10, 12, 14, // a-hi + 24, 26, 28, 30, // b-hi + ]; + let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m256i() + } } -/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16) @@ -2347,11 +2391,33 @@ pub fn _mm256_packs_epi32(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackuswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(packuswb(a.as_i16x16(), b.as_i16x16())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = simd_splat(u8::MAX as i16); + let min = simd_splat(u8::MIN as i16); + + let clamped_a = simd_imax(simd_imin(a.as_i16x16(), max), min) + .as_m256i() + .as_i8x32(); + let clamped_b = simd_imax(simd_imin(b.as_i16x16(), max), min) + .as_m256i() + .as_i8x32(); + + #[rustfmt::skip] + const IDXS: [u32; 32] = [ + 00, 02, 04, 06, 08, 10, 12, 14, // a-lo i16 to u8 conversions + 32, 34, 36, 38, 40, 42, 44, 46, // b-lo + 16, 18, 20, 22, 24, 26, 28, 30, // a-hi + 48, 50, 52, 54, 56, 58, 60, 62, // b-hi + ]; + let result: i8x32 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m256i() + } } -/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi32) @@ -2359,8 +2425,30 @@ pub fn _mm256_packus_epi16(a: __m256i, b: __m256i) -> __m256i { #[target_feature(enable = "avx2")] #[cfg_attr(test, assert_instr(vpackusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { - unsafe { transmute(packusdw(a.as_i32x8(), b.as_i32x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_packus_epi32(a: __m256i, b: __m256i) -> __m256i { + unsafe { + let max = simd_splat(u16::MAX as i32); + let min = simd_splat(u16::MIN as i32); + + let clamped_a = simd_imax(simd_imin(a.as_i32x8(), max), min) + .as_m256i() + .as_i16x16(); + let clamped_b = simd_imax(simd_imin(b.as_i32x8(), max), min) + .as_m256i() + .as_i16x16(); + + #[rustfmt::skip] + const IDXS: [u32; 16] = [ + 00, 02, 04, 06, // a-lo i32 to u16 conversions + 16, 18, 20, 22, // b-lo + 08, 10, 12, 14, // a-hi + 24, 26, 28, 30, // b-hi + ]; + let result: i16x16 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m256i() + } } /// Permutes packed 32-bit integers from `a` according to the content of `b`. @@ -3827,14 +3915,6 @@ unsafe extern "C" { fn mpsadbw(a: u8x32, b: u8x32, imm8: i8) -> u16x16; #[link_name = "llvm.x86.avx2.pmul.hr.sw"] fn pmulhrsw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.packsswb"] - fn packsswb(a: i16x16, b: i16x16) -> i8x32; - #[link_name = "llvm.x86.avx2.packssdw"] - fn packssdw(a: i32x8, b: i32x8) -> i16x16; - #[link_name = "llvm.x86.avx2.packuswb"] - fn packuswb(a: i16x16, b: i16x16) -> u8x32; - #[link_name = "llvm.x86.avx2.packusdw"] - fn packusdw(a: i32x8, b: i32x8) -> u16x16; #[link_name = "llvm.x86.avx2.psad.bw"] fn psadbw(a: u8x32, b: u8x32) -> u64x4; #[link_name = "llvm.x86.avx2.psign.b"] @@ -4988,7 +5068,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_packs_epi16() { + const fn test_mm256_packs_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_packs_epi16(a, b); @@ -5004,7 +5084,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_packs_epi32() { + const fn test_mm256_packs_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_packs_epi32(a, b); @@ -5014,7 +5094,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_packus_epi16() { + const fn test_mm256_packus_epi16() { let a = _mm256_set1_epi16(2); let b = _mm256_set1_epi16(4); let r = _mm256_packus_epi16(a, b); @@ -5030,7 +5110,7 @@ mod tests { } #[simd_test(enable = "avx2")] - fn test_mm256_packus_epi32() { + const fn test_mm256_packus_epi32() { let a = _mm256_set1_epi32(2); let b = _mm256_set1_epi32(4); let r = _mm256_packus_epi32(a, b); diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs index 3ba171c0fa50f..b41f8576cfe54 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -6523,8 +6523,34 @@ pub fn _mm_maskz_maddubs_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vpackssdw(a.as_i32x16(), b.as_i32x16())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = simd_splat(i16::MAX as i32); + let min = simd_splat(i16::MIN as i32); + + let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min) + .as_m512i() + .as_i16x32(); + let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min) + .as_m512i() + .as_i16x32(); + + #[rustfmt::skip] + const IDXS: [u32; 32] = [ + 00, 02, 04, 06, + 32, 34, 36, 38, + 08, 10, 12, 14, + 40, 42, 44, 46, + 16, 18, 20, 22, + 48, 50, 52, 54, + 24, 26, 28, 30, + 56, 58, 60, 62, + ]; + let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m512i() + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6534,7 +6560,13 @@ pub fn _mm512_packs_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_packs_epi32( + src: __m512i, + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { unsafe { let pack = _mm512_packs_epi32(a, b).as_i16x32(); transmute(simd_select_bitmask(k, pack, src.as_i16x32())) @@ -6548,7 +6580,8 @@ pub fn _mm512_mask_packs_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { unsafe { let pack = _mm512_packs_epi32(a, b).as_i16x32(); transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) @@ -6562,7 +6595,13 @@ pub fn _mm512_maskz_packs_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub fn _mm256_mask_packs_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_packs_epi32( + src: __m256i, + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { unsafe { let pack = _mm256_packs_epi32(a, b).as_i16x16(); transmute(simd_select_bitmask(k, pack, src.as_i16x16())) @@ -6590,7 +6629,8 @@ pub fn _mm256_maskz_packs_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packs_epi32(a, b).as_i16x8(); transmute(simd_select_bitmask(k, pack, src.as_i16x8())) @@ -6604,7 +6644,8 @@ pub fn _mm_mask_packs_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) - #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackssdw))] -pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packs_epi32(a, b).as_i16x8(); transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) @@ -6618,8 +6659,34 @@ pub fn _mm_maskz_packs_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vpacksswb(a.as_i16x32(), b.as_i16x32())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = simd_splat(i8::MAX as i16); + let min = simd_splat(i8::MIN as i16); + + let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min) + .as_m512i() + .as_i8x64(); + let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min) + .as_m512i() + .as_i8x64(); + + #[rustfmt::skip] + const IDXS: [u32; 64] = [ + 000, 002, 004, 006, 008, 010, 012, 014, + 064, 066, 068, 070, 072, 074, 076, 078, + 016, 018, 020, 022, 024, 026, 028, 030, + 080, 082, 084, 086, 088, 090, 092, 094, + 032, 034, 036, 038, 040, 042, 044, 046, + 096, 098, 100, 102, 104, 106, 108, 110, + 048, 050, 052, 054, 056, 058, 060, 062, + 112, 114, 116, 118, 120, 122, 124, 126, + ]; + let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m512i() + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using signed saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6629,7 +6696,13 @@ pub fn _mm512_packs_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_packs_epi16( + src: __m512i, + k: __mmask64, + a: __m512i, + b: __m512i, +) -> __m512i { unsafe { let pack = _mm512_packs_epi16(a, b).as_i8x64(); transmute(simd_select_bitmask(k, pack, src.as_i8x64())) @@ -6643,7 +6716,8 @@ pub fn _mm512_mask_packs_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { unsafe { let pack = _mm512_packs_epi16(a, b).as_i8x64(); transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) @@ -6657,7 +6731,13 @@ pub fn _mm512_maskz_packs_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_packs_epi16( + src: __m256i, + k: __mmask32, + a: __m256i, + b: __m256i, +) -> __m256i { unsafe { let pack = _mm256_packs_epi16(a, b).as_i8x32(); transmute(simd_select_bitmask(k, pack, src.as_i8x32())) @@ -6671,7 +6751,8 @@ pub fn _mm256_mask_packs_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { unsafe { let pack = _mm256_packs_epi16(a, b).as_i8x32(); transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) @@ -6685,7 +6766,8 @@ pub fn _mm256_maskz_packs_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packs_epi16(a, b).as_i8x16(); transmute(simd_select_bitmask(k, pack, src.as_i8x16())) @@ -6699,7 +6781,8 @@ pub fn _mm_mask_packs_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpacksswb))] -pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packs_epi16(a, b).as_i8x16(); transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) @@ -6713,8 +6796,34 @@ pub fn _mm_maskz_packs_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vpackusdw(a.as_i32x16(), b.as_i32x16())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = simd_splat(u16::MAX as i32); + let min = simd_splat(u16::MIN as i32); + + let clamped_a = simd_imax(simd_imin(a.as_i32x16(), max), min) + .as_m512i() + .as_i16x32(); + let clamped_b = simd_imax(simd_imin(b.as_i32x16(), max), min) + .as_m512i() + .as_i16x32(); + + #[rustfmt::skip] + const IDXS: [u32; 32] = [ + 00, 02, 04, 06, + 32, 34, 36, 38, + 08, 10, 12, 14, + 40, 42, 44, 46, + 16, 18, 20, 22, + 48, 50, 52, 54, + 24, 26, 28, 30, + 56, 58, 60, 62, + ]; + let result: i16x32 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m512i() + } } /// Convert packed signed 32-bit integers from a and b to packed 16-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6724,7 +6833,13 @@ pub fn _mm512_packus_epi32(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_packus_epi32( + src: __m512i, + k: __mmask32, + a: __m512i, + b: __m512i, +) -> __m512i { unsafe { let pack = _mm512_packus_epi32(a, b).as_i16x32(); transmute(simd_select_bitmask(k, pack, src.as_i16x32())) @@ -6738,7 +6853,8 @@ pub fn _mm512_mask_packus_epi32(src: __m512i, k: __mmask32, a: __m512i, b: __m51 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512i { unsafe { let pack = _mm512_packus_epi32(a, b).as_i16x32(); transmute(simd_select_bitmask(k, pack, i16x32::ZERO)) @@ -6752,7 +6868,13 @@ pub fn _mm512_maskz_packus_epi32(k: __mmask32, a: __m512i, b: __m512i) -> __m512 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_packus_epi32( + src: __m256i, + k: __mmask16, + a: __m256i, + b: __m256i, +) -> __m256i { unsafe { let pack = _mm256_packus_epi32(a, b).as_i16x16(); transmute(simd_select_bitmask(k, pack, src.as_i16x16())) @@ -6766,7 +6888,8 @@ pub fn _mm256_mask_packus_epi32(src: __m256i, k: __mmask16, a: __m256i, b: __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256i { unsafe { let pack = _mm256_packus_epi32(a, b).as_i16x16(); transmute(simd_select_bitmask(k, pack, i16x16::ZERO)) @@ -6780,7 +6903,8 @@ pub fn _mm256_maskz_packus_epi32(k: __mmask16, a: __m256i, b: __m256i) -> __m256 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packus_epi32(a, b).as_i16x8(); transmute(simd_select_bitmask(k, pack, src.as_i16x8())) @@ -6794,7 +6918,8 @@ pub fn _mm_mask_packus_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackusdw))] -pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packus_epi32(a, b).as_i16x8(); transmute(simd_select_bitmask(k, pack, i16x8::ZERO)) @@ -6808,8 +6933,34 @@ pub fn _mm_maskz_packus_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { - unsafe { transmute(vpackuswb(a.as_i16x32(), b.as_i16x32())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { + unsafe { + let max = simd_splat(u8::MAX as i16); + let min = simd_splat(u8::MIN as i16); + + let clamped_a = simd_imax(simd_imin(a.as_i16x32(), max), min) + .as_m512i() + .as_i8x64(); + let clamped_b = simd_imax(simd_imin(b.as_i16x32(), max), min) + .as_m512i() + .as_i8x64(); + + #[rustfmt::skip] + const IDXS: [u32; 64] = [ + 000, 002, 004, 006, 008, 010, 012, 014, + 064, 066, 068, 070, 072, 074, 076, 078, + 016, 018, 020, 022, 024, 026, 028, 030, + 080, 082, 084, 086, 088, 090, 092, 094, + 032, 034, 036, 038, 040, 042, 044, 046, + 096, 098, 100, 102, 104, 106, 108, 110, + 048, 050, 052, 054, 056, 058, 060, 062, + 112, 114, 116, 118, 120, 122, 124, 126, + ]; + let result: i8x64 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m512i() + } } /// Convert packed signed 16-bit integers from a and b to packed 8-bit integers using unsigned saturation, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). @@ -6819,7 +6970,13 @@ pub fn _mm512_packus_epi16(a: __m512i, b: __m512i) -> __m512i { #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_mask_packus_epi16( + src: __m512i, + k: __mmask64, + a: __m512i, + b: __m512i, +) -> __m512i { unsafe { let pack = _mm512_packus_epi16(a, b).as_i8x64(); transmute(simd_select_bitmask(k, pack, src.as_i8x64())) @@ -6833,7 +6990,8 @@ pub fn _mm512_mask_packus_epi16(src: __m512i, k: __mmask64, a: __m512i, b: __m51 #[target_feature(enable = "avx512bw")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { unsafe { let pack = _mm512_packus_epi16(a, b).as_i8x64(); transmute(simd_select_bitmask(k, pack, i8x64::ZERO)) @@ -6847,7 +7005,13 @@ pub fn _mm512_maskz_packus_epi16(k: __mmask64, a: __m512i, b: __m512i) -> __m512 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_mask_packus_epi16( + src: __m256i, + k: __mmask32, + a: __m256i, + b: __m256i, +) -> __m256i { unsafe { let pack = _mm256_packus_epi16(a, b).as_i8x32(); transmute(simd_select_bitmask(k, pack, src.as_i8x32())) @@ -6861,7 +7025,8 @@ pub fn _mm256_mask_packus_epi16(src: __m256i, k: __mmask32, a: __m256i, b: __m25 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { unsafe { let pack = _mm256_packus_epi16(a, b).as_i8x32(); transmute(simd_select_bitmask(k, pack, i8x32::ZERO)) @@ -6875,7 +7040,8 @@ pub fn _mm256_maskz_packus_epi16(k: __mmask32, a: __m256i, b: __m256i) -> __m256 #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packus_epi16(a, b).as_i8x16(); transmute(simd_select_bitmask(k, pack, src.as_i8x16())) @@ -6889,7 +7055,8 @@ pub fn _mm_mask_packus_epi16(src: __m128i, k: __mmask16, a: __m128i, b: __m128i) #[target_feature(enable = "avx512bw,avx512vl")] #[stable(feature = "stdarch_x86_avx512", since = "1.89")] #[cfg_attr(test, assert_instr(vpackuswb))] -pub fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_maskz_packus_epi16(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { unsafe { let pack = _mm_packus_epi16(a, b).as_i8x16(); transmute(simd_select_bitmask(k, pack, i8x16::ZERO)) @@ -12606,15 +12773,6 @@ unsafe extern "C" { #[link_name = "llvm.x86.avx512.pmaddubs.w.512"] fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32; - #[link_name = "llvm.x86.avx512.packssdw.512"] - fn vpackssdw(a: i32x16, b: i32x16) -> i16x32; - #[link_name = "llvm.x86.avx512.packsswb.512"] - fn vpacksswb(a: i16x32, b: i16x32) -> i8x64; - #[link_name = "llvm.x86.avx512.packusdw.512"] - fn vpackusdw(a: i32x16, b: i32x16) -> u16x32; - #[link_name = "llvm.x86.avx512.packuswb.512"] - fn vpackuswb(a: i16x32, b: i16x32) -> u8x64; - #[link_name = "llvm.x86.avx512.psll.w.512"] fn vpsllw(a: i16x32, count: i16x8) -> i16x32; @@ -17702,7 +17860,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_packs_epi32() { + const fn test_mm512_packs_epi32() { let a = _mm512_set1_epi32(i32::MAX); let b = _mm512_set1_epi32(1); let r = _mm512_packs_epi32(a, b); @@ -17713,7 +17871,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_packs_epi32() { + const fn test_mm512_mask_packs_epi32() { let a = _mm512_set1_epi32(i32::MAX); let b = _mm512_set1_epi32(1 << 16 | 1); let r = _mm512_mask_packs_epi32(a, 0, a, b); @@ -17726,7 +17884,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_packs_epi32() { + const fn test_mm512_maskz_packs_epi32() { let a = _mm512_set1_epi32(i32::MAX); let b = _mm512_set1_epi32(1); let r = _mm512_maskz_packs_epi32(0, a, b); @@ -17739,7 +17897,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_packs_epi32() { + const fn test_mm256_mask_packs_epi32() { let a = _mm256_set1_epi32(i32::MAX); let b = _mm256_set1_epi32(1 << 16 | 1); let r = _mm256_mask_packs_epi32(a, 0, a, b); @@ -17763,7 +17921,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_packs_epi32() { + const fn test_mm_mask_packs_epi32() { let a = _mm_set1_epi32(i32::MAX); let b = _mm_set1_epi32(1 << 16 | 1); let r = _mm_mask_packs_epi32(a, 0, a, b); @@ -17774,7 +17932,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_packs_epi32() { + const fn test_mm_maskz_packs_epi32() { let a = _mm_set1_epi32(i32::MAX); let b = _mm_set1_epi32(1); let r = _mm_maskz_packs_epi32(0, a, b); @@ -17785,7 +17943,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_packs_epi16() { + const fn test_mm512_packs_epi16() { let a = _mm512_set1_epi16(i16::MAX); let b = _mm512_set1_epi16(1); let r = _mm512_packs_epi16(a, b); @@ -17798,7 +17956,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_packs_epi16() { + const fn test_mm512_mask_packs_epi16() { let a = _mm512_set1_epi16(i16::MAX); let b = _mm512_set1_epi16(1 << 8 | 1); let r = _mm512_mask_packs_epi16(a, 0, a, b); @@ -17818,7 +17976,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_packs_epi16() { + const fn test_mm512_maskz_packs_epi16() { let a = _mm512_set1_epi16(i16::MAX); let b = _mm512_set1_epi16(1); let r = _mm512_maskz_packs_epi16(0, a, b); @@ -17837,7 +17995,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_packs_epi16() { + const fn test_mm256_mask_packs_epi16() { let a = _mm256_set1_epi16(i16::MAX); let b = _mm256_set1_epi16(1 << 8 | 1); let r = _mm256_mask_packs_epi16(a, 0, a, b); @@ -17850,7 +18008,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_packs_epi16() { + const fn test_mm256_maskz_packs_epi16() { let a = _mm256_set1_epi16(i16::MAX); let b = _mm256_set1_epi16(1); let r = _mm256_maskz_packs_epi16(0, a, b); @@ -17863,7 +18021,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_packs_epi16() { + const fn test_mm_mask_packs_epi16() { let a = _mm_set1_epi16(i16::MAX); let b = _mm_set1_epi16(1 << 8 | 1); let r = _mm_mask_packs_epi16(a, 0, a, b); @@ -17875,7 +18033,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_packs_epi16() { + const fn test_mm_maskz_packs_epi16() { let a = _mm_set1_epi16(i16::MAX); let b = _mm_set1_epi16(1); let r = _mm_maskz_packs_epi16(0, a, b); @@ -17887,7 +18045,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_packus_epi32() { + const fn test_mm512_packus_epi32() { let a = _mm512_set1_epi32(-1); let b = _mm512_set1_epi32(1); let r = _mm512_packus_epi32(a, b); @@ -17898,7 +18056,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_packus_epi32() { + const fn test_mm512_mask_packus_epi32() { let a = _mm512_set1_epi32(-1); let b = _mm512_set1_epi32(1 << 16 | 1); let r = _mm512_mask_packus_epi32(a, 0, a, b); @@ -17911,7 +18069,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_packus_epi32() { + const fn test_mm512_maskz_packus_epi32() { let a = _mm512_set1_epi32(-1); let b = _mm512_set1_epi32(1); let r = _mm512_maskz_packus_epi32(0, a, b); @@ -17924,7 +18082,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_packus_epi32() { + const fn test_mm256_mask_packus_epi32() { let a = _mm256_set1_epi32(-1); let b = _mm256_set1_epi32(1 << 16 | 1); let r = _mm256_mask_packus_epi32(a, 0, a, b); @@ -17935,7 +18093,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_packus_epi32() { + const fn test_mm256_maskz_packus_epi32() { let a = _mm256_set1_epi32(-1); let b = _mm256_set1_epi32(1); let r = _mm256_maskz_packus_epi32(0, a, b); @@ -17946,7 +18104,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_packus_epi32() { + const fn test_mm_mask_packus_epi32() { let a = _mm_set1_epi32(-1); let b = _mm_set1_epi32(1 << 16 | 1); let r = _mm_mask_packus_epi32(a, 0, a, b); @@ -17957,7 +18115,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_packus_epi32() { + const fn test_mm_maskz_packus_epi32() { let a = _mm_set1_epi32(-1); let b = _mm_set1_epi32(1); let r = _mm_maskz_packus_epi32(0, a, b); @@ -17968,7 +18126,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_packus_epi16() { + const fn test_mm512_packus_epi16() { let a = _mm512_set1_epi16(-1); let b = _mm512_set1_epi16(1); let r = _mm512_packus_epi16(a, b); @@ -17981,7 +18139,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_mask_packus_epi16() { + const fn test_mm512_mask_packus_epi16() { let a = _mm512_set1_epi16(-1); let b = _mm512_set1_epi16(1 << 8 | 1); let r = _mm512_mask_packus_epi16(a, 0, a, b); @@ -18001,7 +18159,7 @@ mod tests { } #[simd_test(enable = "avx512bw")] - fn test_mm512_maskz_packus_epi16() { + const fn test_mm512_maskz_packus_epi16() { let a = _mm512_set1_epi16(-1); let b = _mm512_set1_epi16(1); let r = _mm512_maskz_packus_epi16(0, a, b); @@ -18020,7 +18178,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_mask_packus_epi16() { + const fn test_mm256_mask_packus_epi16() { let a = _mm256_set1_epi16(-1); let b = _mm256_set1_epi16(1 << 8 | 1); let r = _mm256_mask_packus_epi16(a, 0, a, b); @@ -18033,7 +18191,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm256_maskz_packus_epi16() { + const fn test_mm256_maskz_packus_epi16() { let a = _mm256_set1_epi16(-1); let b = _mm256_set1_epi16(1); let r = _mm256_maskz_packus_epi16(0, a, b); @@ -18046,7 +18204,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_mask_packus_epi16() { + const fn test_mm_mask_packus_epi16() { let a = _mm_set1_epi16(-1); let b = _mm_set1_epi16(1 << 8 | 1); let r = _mm_mask_packus_epi16(a, 0, a, b); @@ -18057,7 +18215,7 @@ mod tests { } #[simd_test(enable = "avx512bw,avx512vl")] - fn test_mm_maskz_packus_epi16() { + const fn test_mm_maskz_packus_epi16() { let a = _mm_set1_epi16(-1); let b = _mm_set1_epi16(1); let r = _mm_maskz_packus_epi16(0, a, b); diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index 2c4439a3f3a55..3f7781cc7dc4c 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -2816,14 +2816,32 @@ mod tests { let aa = &[3.0f32, 12.0, 23.0, NAN]; let bb = &[3.0f32, 47.5, 1.5, NAN]; - let ee = &[1i32, 0, 1, 0]; + let ee = &[0i32, 0, 1, 0]; for i in 0..4 { let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); - let r = _mm_comige_ss(a, b); + let r = _mm_comigt_ss(a, b); + assert_eq!( + ee[i], r, + "_mm_comigt_ss({:?}, {:?}) = {}, expected: {} (i={})", + a, b, r, ee[i], i + ); + } + } + + #[simd_test(enable = "sse")] + fn test_mm_comige_ss() { + let aa = &[3.0f32, 23.0, 12.0, NAN]; + let bb = &[3.0f32, 1.5, 47.5, NAN]; + let ee = &[1i32, 1, 0, 0]; + + for i in 0..4 { + let a = _mm_setr_ps(aa[i], 1.0, 2.0, 3.0); + let b = _mm_setr_ps(bb[i], 0.0, 2.0, 4.0); + let r = _mm_comige_ss(a, b); assert_eq!( ee[i], r, "_mm_comige_ss({:?}, {:?}) = {}, expected: {} (i={})", diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index f339a003df4d1..1f97f3c69d0e3 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -1484,7 +1484,7 @@ pub const fn _mm_move_epi64(a: __m128i) -> __m128i { } } -/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers /// using signed saturation. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi16) @@ -1492,11 +1492,29 @@ pub const fn _mm_move_epi64(a: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packsswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(packsswb(a.as_i16x8(), b.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = simd_splat(i8::MAX as i16); + let min = simd_splat(i8::MIN as i16); + + let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min) + .as_m128i() + .as_i8x16(); + let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min) + .as_m128i() + .as_i8x16(); + + // Shuffle the low i8 of each i16 from two concatenated vectors into + // the low bits of the result register. + const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]; + let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m128i() + } } -/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers /// using signed saturation. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32) @@ -1504,11 +1522,25 @@ pub fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packssdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(packssdw(a.as_i32x4(), b.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = simd_splat(i16::MAX as i32); + let min = simd_splat(i16::MIN as i32); + + let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min); + let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min); + + let clamped_a: i16x4 = simd_cast(clamped_a); + let clamped_b: i16x4 = simd_cast(clamped_b); + + let a: i64 = transmute(clamped_a); + let b: i64 = transmute(clamped_b); + i64x2::new(a, b).as_m128i() + } } -/// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers +/// Converts packed signed 16-bit integers from `a` and `b` to packed 8-bit integers /// using unsigned saturation. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16) @@ -1516,8 +1548,28 @@ pub fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse2")] #[cfg_attr(test, assert_instr(packuswb))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(packuswb(a.as_i16x8(), b.as_i16x8())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = simd_splat(u8::MAX as i16); + let min = simd_splat(u8::MIN as i16); + + let clamped_a = simd_imax(simd_imin(a.as_i16x8(), max), min) + .as_m128i() + .as_i8x16(); + let clamped_b = simd_imax(simd_imin(b.as_i16x8(), max), min) + .as_m128i() + .as_i8x16(); + + // Shuffle the low bytes of each i16 from two concatenated vectors into + // the low bits of the result register. + // Without `simd_shuffle`, this intrinsic will cause the AVX-512BW + // `_mm_mask_packus_epi16` and `_mm_maskz_packus_epi16` tests to fail. + const IDXS: [u32; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]; + let result: i8x16 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m128i() + } } /// Returns the `imm8` element of `a`. @@ -3217,12 +3269,6 @@ unsafe extern "C" { fn cvtps2dq(a: __m128) -> i32x4; #[link_name = "llvm.x86.sse2.maskmov.dqu"] fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); - #[link_name = "llvm.x86.sse2.packsswb.128"] - fn packsswb(a: i16x8, b: i16x8) -> i8x16; - #[link_name = "llvm.x86.sse2.packssdw.128"] - fn packssdw(a: i32x4, b: i32x4) -> i16x8; - #[link_name = "llvm.x86.sse2.packuswb.128"] - fn packuswb(a: i16x8, b: i16x8) -> u8x16; #[link_name = "llvm.x86.sse2.max.sd"] fn maxsd(a: __m128d, b: __m128d) -> __m128d; #[link_name = "llvm.x86.sse2.max.pd"] @@ -4286,7 +4332,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_packs_epi16() { + const fn test_mm_packs_epi16() { let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); let r = _mm_packs_epi16(a, b); @@ -4300,7 +4346,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_packs_epi32() { + const fn test_mm_packs_epi32() { let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); let r = _mm_packs_epi32(a, b); @@ -4311,7 +4357,7 @@ mod tests { } #[simd_test(enable = "sse2")] - fn test_mm_packus_epi16() { + const fn test_mm_packus_epi16() { let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); let r = _mm_packus_epi16(a, b); diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 7ad4306f36f21..4ebf7d3bd39a8 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -418,7 +418,7 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { unsafe { simd_imin(a.as_u32x4(), b.as_u32x4()).as_m128i() } } -/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers +/// Converts packed signed 32-bit integers from `a` and `b` to packed 16-bit integers /// using unsigned saturation /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi32) @@ -426,8 +426,26 @@ pub const fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { #[target_feature(enable = "sse4.1")] #[cfg_attr(test, assert_instr(packusdw))] #[stable(feature = "simd_x86", since = "1.27.0")] -pub fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { - unsafe { transmute(packusdw(a.as_i32x4(), b.as_i32x4())) } +#[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")] +pub const fn _mm_packus_epi32(a: __m128i, b: __m128i) -> __m128i { + unsafe { + let max = simd_splat(u16::MAX as i32); + let min = simd_splat(u16::MIN as i32); + + let clamped_a = simd_imax(simd_imin(a.as_i32x4(), max), min) + .as_m128i() + .as_i16x8(); + let clamped_b = simd_imax(simd_imin(b.as_i32x4(), max), min) + .as_m128i() + .as_i16x8(); + + // Shuffle the low u16 of each i32 from two concatenated vectors into + // the low bits of the result register. + const IDXS: [u32; 8] = [0, 2, 4, 6, 8, 10, 12, 14]; + let result: i16x8 = simd_shuffle!(clamped_a, clamped_b, IDXS); + + result.as_m128i() + } } /// Compares packed 64-bit integers in `a` and `b` for equality @@ -1166,8 +1184,6 @@ pub unsafe fn _mm_stream_load_si128(mem_addr: *const __m128i) -> __m128i { unsafe extern "C" { #[link_name = "llvm.x86.sse41.insertps"] fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; - #[link_name = "llvm.x86.sse41.packusdw"] - fn packusdw(a: i32x4, b: i32x4) -> u16x8; #[link_name = "llvm.x86.sse41.dppd"] fn dppd(a: __m128d, b: __m128d, imm8: u8) -> __m128d; #[link_name = "llvm.x86.sse41.dpps"] @@ -1455,7 +1471,7 @@ mod tests { } #[simd_test(enable = "sse4.1")] - fn test_mm_packus_epi32() { + const fn test_mm_packus_epi32() { let a = _mm_setr_epi32(1, 2, 3, 4); let b = _mm_setr_epi32(-1, -2, -3, -4); let r = _mm_packus_epi32(a, b); diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml index 9190c8518a667..8574aacee6671 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml @@ -4567,20 +4567,11 @@ intrinsics: unsafe: [neon] attr: - *neon-stable - assert_instr: [st1] + assert_instr: [stp] types: - - ['f64', float64x1x2_t, float64x1_t] + - ['f64', float64x1x2_t] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -4591,19 +4582,10 @@ intrinsics: - *neon-stable assert_instr: [st2] types: - - [i64, int64x2x2_t, int64x2_t] - - [f64, float64x2x2_t, float64x2_t] + - [i64, int64x2x2_t, "2"] + - [f64, float64x2x2_t, "2"] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true] - name: "vst2{neon_type[1].lane_nox}" doc: "Store multiple 2-element structures from two registers" @@ -4781,19 +4763,9 @@ intrinsics: safety: unsafe: [neon] types: - - [f64, float64x1x3_t, float64x1_t] + - [f64, float64x1x3_t] compose: - - LLVMLink: - name: 'st3.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst3{neon_type[1].lane_nox}" doc: "Store multiple 3-element structures from three registers" @@ -4860,20 +4832,10 @@ intrinsics: safety: unsafe: [neon] types: - - [i64, int64x2x3_t, int64x2_t] - - [f64, float64x2x3_t, float64x2_t] + - [i64, int64x2x3_t, "2"] + - [f64, float64x2x3_t, "2"] compose: - - LLVMLink: - name: 'st3.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" @@ -4995,20 +4957,9 @@ intrinsics: safety: unsafe: [neon] types: - - [f64, float64x1x4_t, float64x1_t] + - [f64, float64x1x4_t] compose: - - LLVMLink: - name: 'st4.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst4{neon_type[1].lane_nox}" doc: "Store multiple 4-element structures from four registers" @@ -5075,21 +5026,10 @@ intrinsics: safety: unsafe: [neon] types: - - [i64, int64x2x4_t, int64x2_t] - - [f64, float64x2x4_t, float64x2_t] + - [i64, int64x2x4_t, "2"] + - [f64, float64x2x4_t, "2"] compose: - - LLVMLink: - name: 'st4.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true] - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" @@ -5434,7 +5374,7 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32, LANE as u32]'] + - ["q_lane_f64", float64x2_t, float64x1_t, "q_f64", '[LANE as u32; 2]'] compose: - FnCall: [static_assert!, ['LANE == 0']] - FnCall: @@ -5503,11 +5443,11 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32, LANE as u32]'] - - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32, LANE as u32]'] - - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32, LANE as u32]'] + - ['_lane_f32', float32x2_t, float32x2_t, '1', '_f32', '[LANE as u32; 2]'] + - ['_laneq_f32', float32x2_t, float32x4_t, '2', '_f32', '[LANE as u32; 2]'] + - ['q_lane_f32', float32x4_t, float32x2_t, '1', 'q_f32', '[LANE as u32; 4]'] + - ['q_laneq_f32', float32x4_t, float32x4_t, '2', 'q_f32', '[LANE as u32; 4]'] + - ['q_laneq_f64', float64x2_t, float64x2_t, '1', 'q_f64', '[LANE as u32; 2]'] compose: - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] - FnCall: @@ -5533,10 +5473,10 @@ intrinsics: static_defs: ["const LANE: i32"] safety: safe types: - - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - ['_lane_f16', float16x4_t, float16x4_t, '2', '_f16', '[LANE as u32; 4]'] + - ['_laneq_f16', float16x4_t, float16x8_t, '3', '_f16', '[LANE as u32; 4]'] + - ['q_lane_f16', float16x8_t, float16x4_t, '2', 'q_f16', '[LANE as u32; 8]'] + - ['q_laneq_f16', float16x8_t, float16x8_t, '3', 'q_f16', '[LANE as u32; 8]'] compose: - FnCall: [static_assert_uimm_bits!, ['LANE', "{type[3]}"]] - FnCall: @@ -7815,14 +7755,14 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]'] + - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]'] + - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] + - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] + - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]'] + - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]'] + - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] + - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}] @@ -7899,14 +7839,14 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32, LANE as u32]'] - - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32, LANE as u32]'] - - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [_lane_s16, int16x4_t, int16x4_t, int16x4_t, '2', '[LANE as u32; 4]'] + - [_laneq_s16, int16x4_t, int16x4_t, int16x8_t, '3', '[LANE as u32; 4]'] + - [q_lane_s16, int16x8_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] + - [q_laneq_s16, int16x8_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] + - [_lane_s32, int32x2_t, int32x2_t, int32x2_t, '1', '[LANE as u32; 2]'] + - [_laneq_s32, int32x2_t, int32x2_t, int32x4_t, '2', '[LANE as u32; 2]'] + - [q_lane_s32, int32x4_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] + - [q_laneq_s32, int32x4_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[4]}']] - Let: [c, "{type[1]}", {FnCall: [simd_shuffle!, [c, c, "{type[5]}"]]}] @@ -11198,7 +11138,7 @@ intrinsics: - FnCall: - simd_mul - - a - - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32, LANE as u32]']] + - FnCall: ["simd_shuffle!", [b, b, '[LANE as u32; 2]']] - name: "vmuld_lane_f64" doc: "Floating-point multiply" @@ -11255,7 +11195,7 @@ intrinsics: - FnCall: - simd_mul - - a - - FnCall: [simd_shuffle!, [b, b, '[LANE as u32, LANE as u32]']] + - FnCall: [simd_shuffle!, [b, b, '[LANE as u32; 2]']] # vmulq_laneq_f16 @@ -11272,8 +11212,8 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [float16x4_t, float16x8_t, '_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] - - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]"] + - [float16x4_t, float16x8_t, '_lane', "[LANE as u32; 4]"] + - [float16x8_t, float16x8_t, 'q_lane', "[LANE as u32; 8]"] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '3']] - FnCall: @@ -11395,10 +11335,10 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] + - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] + - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] + - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: @@ -11418,10 +11358,10 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: @@ -11720,10 +11660,10 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int16x8_t, int16x4_t, int32x4_t, '2', '[LANE as u32; 8]'] + - [int16x8_t, int16x8_t, int32x4_t, '3', '[LANE as u32; 8]'] + - [int32x4_t, int32x2_t, int64x2_t, '1', '[LANE as u32; 4]'] + - [int32x4_t, int32x4_t, int64x2_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: @@ -11742,10 +11682,10 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint16x8_t, uint16x4_t, uint32x4_t, '2', '[LANE as u32; 8]'] + - [uint16x8_t, uint16x8_t, uint32x4_t, '3', '[LANE as u32; 8]'] + - [uint32x4_t, uint32x2_t, uint64x2_t, '1', '[LANE as u32; 4]'] + - [uint32x4_t, uint32x4_t, uint64x2_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, "{type[3]}"]] - FnCall: @@ -12033,10 +11973,10 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [int32x4_t, int16x8_t, int16x4_t, '2', '[LANE as u32; 8]'] + - [int32x4_t, int16x8_t, int16x8_t, '3', '[LANE as u32; 8]'] + - [int64x2_t, int32x4_t, int32x2_t, '1', '[LANE as u32; 4]'] + - [int64x2_t, int32x4_t, int32x4_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] @@ -12052,10 +11992,10 @@ intrinsics: static_defs: ['const LANE: i32'] safety: safe types: - - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] - - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32, LANE as u32, LANE as u32, LANE as u32]'] + - [uint32x4_t, uint16x8_t, uint16x4_t, '2', '[LANE as u32; 8]'] + - [uint32x4_t, uint16x8_t, uint16x8_t, '3', '[LANE as u32; 8]'] + - [uint64x2_t, uint32x4_t, uint32x2_t, '1', '[LANE as u32; 4]'] + - [uint64x2_t, uint32x4_t, uint32x4_t, '2', '[LANE as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [LANE, '{type[3]}']] - FnCall: ['vmlal_high_{neon_type[2]}', [a, b, {FnCall: [simd_shuffle!, [c, c, '{type[4]}']]}]] diff --git a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml index 8e10fff984ac7..5104ae607ccff 100644 --- a/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml +++ b/library/stdarch/crates/stdarch-gen-arm/spec/neon/arm_shared.spec.yml @@ -1439,12 +1439,12 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_s8, int8x8_t, int8x8_t, '3', '[N as u32; 8]'] + - [q_lane_s8, int8x8_t, int8x16_t, '3', '[N as u32; 16]'] + - [_lane_u8, uint8x8_t, uint8x8_t, '3', '[N as u32; 8]'] + - [q_lane_u8, uint8x8_t, uint8x16_t, '3', '[N as u32; 16]'] + - [_lane_p8, poly8x8_t, poly8x8_t, '3', '[N as u32; 8]'] + - [q_lane_p8, poly8x8_t, poly8x16_t, '3', '[N as u32; 16]'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] @@ -1463,12 +1463,12 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_s8, int8x16_t, int8x16_t, '4', '[N as u32; 16]'] + - [_laneq_s8, int8x16_t, int8x8_t, '4', '[N as u32; 8]'] + - [q_laneq_u8, uint8x16_t, uint8x16_t, '4', '[N as u32; 16]'] + - [_laneq_u8, uint8x16_t, uint8x8_t, '4', '[N as u32; 8]'] + - [q_laneq_p8, poly8x16_t, poly8x16_t, '4', '[N as u32; 16]'] + - [_laneq_p8, poly8x16_t, poly8x8_t, '4', '[N as u32; 8]'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] @@ -1487,12 +1487,12 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_s16, int16x4_t, int16x4_t, '2', '[N as u32; 4]'] + - [q_lane_s16, int16x4_t, int16x8_t, '2', '[N as u32; 8]'] + - [_lane_u16, uint16x4_t, uint16x4_t, '2', '[N as u32; 4]'] + - [q_lane_u16, uint16x4_t, uint16x8_t, '2', '[N as u32; 8]'] + - [_lane_p16, poly16x4_t, poly16x4_t, '2', '[N as u32; 4]'] + - [q_lane_p16, poly16x4_t, poly16x8_t, '2', '[N as u32; 8]'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] @@ -1511,12 +1511,12 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] - - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] - - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_s16, int16x8_t, int16x8_t, '3', '[N as u32; 8]'] + - [_laneq_s16, int16x8_t, int16x4_t, '3', '[N as u32; 4]'] + - [q_laneq_u16, uint16x8_t, uint16x8_t, '3', '[N as u32; 8]'] + - [_laneq_u16, uint16x8_t, uint16x4_t, '3', '[N as u32; 4]'] + - [q_laneq_p16, poly16x8_t, poly16x8_t, '3', '[N as u32; 8]'] + - [_laneq_p16, poly16x8_t, poly16x4_t, '3', '[N as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] @@ -1538,8 +1538,8 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] - - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32, N as u32, N as u32, N as u32]'] + - [q_laneq_f16, float16x8_t, float16x8_t, '3', '[N as u32; 8]'] + - [_laneq_f16, float16x8_t, float16x4_t, '3', '[N as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] @@ -1578,8 +1578,8 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32, N as u32, N as u32, N as u32]'] - - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]'] + - [_lane_f16, float16x4_t, float16x4_t, '2', '[N as u32; 4]'] + - [q_lane_f16, float16x4_t, float16x8_t, '2', '[N as u32; 8]'] compose: - FnCall: [static_assert_uimm_bits!, [N, "{type[3]}"]] - FnCall: [simd_shuffle!, [a, a, "{type[4]}"]] @@ -5049,17 +5049,7 @@ intrinsics: types: - [i64, int64x1x2_t, int64x1_t] compose: - - LLVMLink: - name: 'vst2.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst2.v{neon_type[1].lane}{type[0]}.p0' - arch: arm - - FnCall: ['_vst2{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', '8']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -5092,16 +5082,7 @@ intrinsics: types: - [i64, int64x1x2_t, int64x1_t] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -5113,26 +5094,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x2_t, int8x8_t] - - [i16, int16x4x2_t, int16x4_t] - - [i32, int32x2x2_t, int32x2_t] - - [i8, int8x16x2_t, int8x16_t] - - [i16, int16x8x2_t, int16x8_t] - - [i32, int32x4x2_t, int32x4_t] - - [f32, float32x2x2_t, float32x2_t] - - [f32, float32x4x2_t, float32x4_t] + - [i8, int8x8x2_t, "8"] + - [i16, int16x4x2_t, "4"] + - [i32, int32x2x2_t, "2"] + - [i8, int8x16x2_t, "16"] + - [i16, int16x8x2_t, "8"] + - [i32, int32x4x2_t, "4"] + - [f32, float32x2x2_t, "2"] + - [f32, float32x4x2_t, "4"] compose: - - LLVMLink: - name: 'st2.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st2.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst2{neon_type[1].nox}', ['b.0', 'b.1', 'a as _']] - + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "2", a, b], [], true] - name: "vst2{neon_type[1].nox}" doc: "Store multiple 2-element structures from two registers" @@ -5426,17 +5397,7 @@ intrinsics: types: - [i64, int64x1x3_t, int64x1_t] compose: - - LLVMLink: - name: 'st3.{neon_type[1].nox}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st3.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst3{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" @@ -5471,18 +5432,7 @@ intrinsics: types: - [i64, int64x1x3_t, int64x1_t] compose: - - LLVMLink: - name: 'vst3.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' - arch: arm - - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', '8']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst3{neon_type[1].nox}" doc: "Store multiple 3-element structures from three registers" @@ -5571,27 +5521,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x3_t, int8x8_t, '1'] - - [i16, int16x4x3_t, int16x4_t, '2'] - - [i32, int32x2x3_t, int32x2_t, '4'] - - [i8, int8x16x3_t, int8x16_t, '1'] - - [i16, int16x8x3_t, int16x8_t, '2'] - - [i32, int32x4x3_t, int32x4_t, '4'] - - [f32, float32x2x3_t, float32x2_t, '4'] - - [f32, float32x4x3_t, float32x4_t, '4'] + - [i8, int8x8x3_t, '8'] + - [i16, int16x4x3_t, '4'] + - [i32, int32x2x3_t, '2'] + - [i8, int8x16x3_t, '16'] + - [i16, int16x8x3_t, '8'] + - [i32, int32x4x3_t, '4'] + - [f32, float32x2x3_t, '2'] + - [f32, float32x4x3_t, '4'] compose: - - LLVMLink: - name: 'vst3.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst3.p0.v{neon_type[1].lane}{type[0]}' - arch: arm - - FnCall: ['_vst3{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', "{type[3]}"]] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "3", a, b], [], true] - name: "vst3{neon_type[1].nox}" @@ -5853,19 +5792,7 @@ intrinsics: types: - [i64, int64x1x4_t, int64x1_t] compose: - - LLVMLink: - name: 'vst4.{neon_type[1]}' - arguments: - - 'ptr: *mut i8' - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'size: i32' - links: - - link: 'llvm.arm.neon.vst4.p0.v{neon_type[1].lane}{type[0]}' - arch: arm - - FnCall: ['_vst4{neon_type[1].nox}', ['a as _', 'b.0', 'b.1', 'b.2', 'b.3', '8']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" @@ -5879,18 +5806,7 @@ intrinsics: types: - [i64, int64x1x4_t, int64x1_t] compose: - - LLVMLink: - name: 'vst4.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.{neon_type[2]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: [core::ptr::write_unaligned, ['a.cast()', b]] - name: "vst4{neon_type[1].nox}" doc: "Store multiple 4-element structures from four registers" @@ -6114,27 +6030,16 @@ intrinsics: safety: unsafe: [neon] types: - - [i8, int8x8x4_t, int8x8_t] - - [i16, int16x4x4_t, int16x4_t] - - [i32, int32x2x4_t, int32x2_t] - - [i8, int8x16x4_t, int8x16_t] - - [i16, int16x8x4_t, int16x8_t] - - [i32, int32x4x4_t, int32x4_t] - - [f32, float32x2x4_t, float32x2_t] - - [f32, float32x4x4_t, float32x4_t] + - [i8, int8x8x4_t, "8"] + - [i16, int16x4x4_t, "4"] + - [i32, int32x2x4_t, "2"] + - [i8, int8x16x4_t, "16"] + - [i16, int16x8x4_t, "8"] + - [i32, int32x4x4_t, "4"] + - [f32, float32x2x4_t, "2"] + - [f32, float32x4x4_t, "4"] compose: - - LLVMLink: - name: 'vst4.{neon_type[1]}' - arguments: - - 'a: {type[2]}' - - 'b: {type[2]}' - - 'c: {type[2]}' - - 'd: {type[2]}' - - 'ptr: *mut i8' - links: - - link: 'llvm.aarch64.neon.st4.v{neon_type[1].lane}{type[0]}.p0' - arch: aarch64,arm64ec - - FnCall: ['_vst4{neon_type[1].nox}', ['b.0', 'b.1', 'b.2', 'b.3', 'a as _']] + - FnCall: ["crate::core_arch::macros::interleaving_store!", [{ Type: "{type[0]}" }, "{type[2]}", "4", a, b], [], true] - name: "vst4{neon_type[1].nox}" @@ -7675,7 +7580,7 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int16x4_t, int16x4_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]'] + - [int16x4_t, int16x4_t, int32x4_t, '[N as u32; 4]'] compose: - FnCall: [static_assert_uimm_bits!, [N, '2']] - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] @@ -7695,7 +7600,7 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [int32x2_t, int32x2_t, int64x2_t, '[N as u32, N as u32]'] + - [int32x2_t, int32x2_t, int64x2_t, '[N as u32; 2]'] compose: - FnCall: [static_assert_uimm_bits!, [N, '1']] - Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}] @@ -8320,9 +8225,9 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }'] - - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }'] - - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64, -N as u64]) }'] + - [uint16x8_t, uint8x8_t, 'N >= 1 && N <= 8', 'const { uint16x8_t([-N as u16; 8]) }'] + - [uint32x4_t, uint16x4_t, 'N >= 1 && N <= 16', 'const { uint32x4_t([-N as u32; 4]) }'] + - [uint64x2_t, uint32x2_t, 'N >= 1 && N <= 32', 'const { uint64x2_t([-N as u64; 2]) }'] compose: - FnCall: [static_assert!, ["{type[2]}"]] - LLVMLink: @@ -10789,9 +10694,9 @@ intrinsics: static_defs: ['const N: i32'] safety: safe types: - - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16]) }'] - - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32, -N as u32, -N as u32, -N as u32]) }'] - - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64, -N as u64]) }'] + - [uint16x8_t, uint8x8_t, '8', 'const { uint16x8_t([-N as u16; 8]) }'] + - [uint32x4_t, uint16x4_t, '16', 'const { uint32x4_t([-N as u32; 4]) }'] + - [uint64x2_t, uint32x2_t, '32', 'const { uint64x2_t([-N as u64; 2]) }'] compose: - FnCall: [static_assert!, ['N >= 1 && N <= {type[2]}']] - LLVMLink: diff --git a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs index 4136463f197fd..2ac05e28cb4ce 100644 --- a/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs +++ b/library/stdarch/crates/stdarch-verify/tests/x86-intel.rs @@ -246,7 +246,6 @@ fn verify_all_signatures() { "_xend", "_xabort_code", // Aliases - "_mm_comige_ss", "_mm_cvt_ss2si", "_mm_cvtt_ss2si", "_mm_cvt_si2ss", diff --git a/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs b/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs index e72a649a530a8..fb520d38df3ca 100644 --- a/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs +++ b/tests/codegen-llvm/riscv-abi/call-llvm-intrinsics.rs @@ -1,17 +1,20 @@ +//@ add-minicore //@ compile-flags: -C no-prepopulate-passes - -//@ only-riscv64 +//@ revisions: riscv32gc riscv64gc +//@ [riscv32gc] compile-flags: --target riscv32gc-unknown-linux-gnu +//@ [riscv32gc] needs-llvm-components: riscv +//@ [riscv64gc] compile-flags: --target riscv64gc-unknown-linux-gnu +//@ [riscv64gc] needs-llvm-components: riscv +//@ min-llvm-version: 21 #![feature(link_llvm_intrinsics)] +#![feature(no_core, lang_items)] +#![no_std] +#![no_core] #![crate_type = "lib"] -struct A; - -impl Drop for A { - fn drop(&mut self) { - println!("A"); - } -} +extern crate minicore; +use minicore::*; extern "C" { #[link_name = "llvm.sqrt.f32"] @@ -19,12 +22,9 @@ extern "C" { } pub fn do_call() { - let _a = A; - unsafe { // Ensure that we `call` LLVM intrinsics instead of trying to `invoke` them - // CHECK: store float 4.000000e+00, ptr %{{.}}, align 4 - // CHECK: call float @llvm.sqrt.f32(float %{{.}} + // CHECK: call float @llvm.sqrt.f32(float 4.000000e+00) sqrt(4.0); } }