diff --git a/.github/workflows/ci_test.yml b/.github/workflows/ci_test.yml index 8af7691..401c299 100644 --- a/.github/workflows/ci_test.yml +++ b/.github/workflows/ci_test.yml @@ -10,11 +10,11 @@ jobs: - name: Checkout sources uses: actions/checkout@v2 - - name: Install stable toolchain + - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable + toolchain: nightly override: true - name: Run cargo check @@ -35,11 +35,11 @@ jobs: - name: Checkout sources uses: actions/checkout@v2 - - name: Install stable toolchain + - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable + toolchain: nightly override: true - name: Run cargo test @@ -54,11 +54,11 @@ jobs: - name: Checkout sources uses: actions/checkout@v2 - - name: Install stable toolchain + - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable + toolchain: nightly override: true components: rustfmt, clippy @@ -96,13 +96,6 @@ jobs: use-cross: true args: --target aarch64-unknown-linux-gnu - - name: Run cargo check with neon - uses: actions-rs/cargo@v1 - with: - command: check - use-cross: true - args: --target aarch64-unknown-linux-gnu --features neon - - name: Run cargo test for arm uses: actions-rs/cargo@v1 with: @@ -110,13 +103,6 @@ jobs: use-cross: true args: --target aarch64-unknown-linux-gnu - - name: Run cargo test for arm with neon - uses: actions-rs/cargo@v1 - with: - command: test - use-cross: true - args: --target aarch64-unknown-linux-gnu --features neon - check_test_wasm32: name: Check and test wasm32 runs-on: ubuntu-latest @@ -124,11 +110,11 @@ jobs: - name: Checkout sources uses: actions/checkout@v2 - - name: Install stable toolchain + - name: Install nightly toolchain uses: actions-rs/toolchain@v1 with: profile: minimal - toolchain: stable + toolchain: nightly target: wasm32-unknown-unknown override: true diff --git a/Cargo.toml b/Cargo.toml index b0466f6..6d9e88d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [package] name = "rubato" -version = "0.11.0" +version = "0.12.0" +rust-version = "1.61" authors = ["HEnquist "] description = "Asynchronous resampling library intended for audio data" license = "MIT" @@ -12,11 +13,6 @@ edition = "2018" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html -[features] -default = ["avx"] -avx = [] -neon = [] - [dependencies] log = { version = "0.4.14", optional = true } realfft = "3.0.0" diff --git a/README.md b/README.md index c548dde..bf062e1 100644 --- a/README.md +++ b/README.md @@ -42,15 +42,13 @@ This type of resampler is considerably faster but doesn't support changing the r ##### Asynchronous resampling -The asynchronous resampler is designed to benefit from auto-vectorization, meaning that the Rust compiler -can recognize calculations that can be done in parallel. It will then use SIMD instructions for those. -This works quite well, but there is still room for improvement. -To address this, it also has optimized SIMD support. -This gets enabled at runtime by checking the SIMD support of the CPU. +The asynchronous resampler supports SIMD on x86_64 and on aarch64. +The SIMD capabilities of the CPU are determined at runtime. +If no supported SIMD instruction set is available, it falls back to a scalar implementation. -On x86_64 it will try to use SSE3. The speed benefit compared to auto-vectorization -depends on the CPU, but tends to be in the range 20-30% for 64-bit data, and 50-100% for 32-bit data. -There is also optional support for AVX on x86_64, and Neon on aarch64 via Cargo features. +On x86_64 it will try to use AVX. If AVX isn't available, it will instead try SSE3. + +On aarch64 (64-bit Arm) it will use Neon if available. ##### Synchronous resampling @@ -58,20 +56,6 @@ The synchronous resamplers benefit from the SIMD support of the RustFFT library. #### Cargo features -###### `avx`: AVX on x86_64 - -The `avx` feature is enabled by default, and enables the use of AVX when it's available. -The speed increase compared to SSE depends on the CPU, and tends to range from zero to 50%. -On other architectures than x86_64 the `avx` feature does nothing. - -###### `neon`: Experimental Neon support on aarch64 - -Experimental support for Neon is available for aarch64 (64-bit Arm) by enabling the `neon` feature. -This requires the use of a nightly compiler, as the Neon support in Rust is still experimental. -On a Raspberry Pi 4, this gives a boost of about 10% for 64-bit floats and 50% for 32-bit floats when -compared to the auto-vectorized implementation. -Note that this only works on a full 64-bit operating system. - ###### `log`: Enable logging This feature enables logging via the `log` crate. This is intended for debugging purposes. @@ -106,10 +90,12 @@ let waves_out = resampler.process(&waves_in, None).unwrap(); ### Compatibility -The `rubato` crate requires rustc version 1.40 or newer. +The `rubato` crate requires rustc version 1.61 or newer. ### Changelog +- v0.12.0 + - Always enable all simd acceleration (and remove the simd Cargo features). - v0.11.0 - New api to allow use in realtime applications. - Configurable adjust range of asynchronous resamplers. diff --git a/benches/resamplers.rs b/benches/resamplers.rs index 27623a3..1098803 100644 --- a/benches/resamplers.rs +++ b/benches/resamplers.rs @@ -1,11 +1,11 @@ -use criterion::{criterion_group, criterion_main, Criterion}; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; extern crate rubato; use rubato::ScalarInterpolator; -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] use rubato::interpolator_avx::AvxInterpolator; -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] use rubato::interpolator_neon::NeonInterpolator; #[cfg(target_arch = "x86_64")] use rubato::interpolator_sse::SseInterpolator; @@ -14,19 +14,19 @@ use rubato::{FftFixedIn, InterpolationType, Resampler, SincFixedIn, WindowFuncti fn bench_fftfixedin(c: &mut Criterion) { let chunksize = 1024; - let mut resampler = FftFixedIn::::new(44100, 192000, 1024, 2, 1); + let mut resampler = FftFixedIn::::new(44100, 192000, 1024, 2, 1).unwrap(); let waveform = vec![vec![0.0 as f64; chunksize]; 1]; c.bench_function("FftFixedIn f64", |b| { - b.iter(|| resampler.process(&waveform).unwrap()) + b.iter(|| resampler.process(black_box(&waveform), None).unwrap()) }); } fn bench_fftfixedin_32(c: &mut Criterion) { let chunksize = 1024; - let mut resampler = FftFixedIn::::new(44100, 192000, 1024, 2, 1); + let mut resampler = FftFixedIn::::new(44100, 192000, 1024, 2, 1).unwrap(); let waveform = vec![vec![0.0 as f32; chunksize]; 1]; c.bench_function("FftFixedIn f32", |b| { - b.iter(|| resampler.process(&waveform).unwrap()) + b.iter(|| resampler.process(black_box(&waveform), None).unwrap()) }); } @@ -61,13 +61,14 @@ macro_rules! bench_async_resampler { let interpolator = Box::new(interpolator); let mut resampler = SincFixedIn::<$ft>::new_with_interpolator( resample_ratio, + 1.1, interpolation_type, interpolator, chunksize, 1, - ); + ).unwrap(); let waveform = vec![vec![0.0 as $ft; chunksize]; 1]; - c.bench_function($desc, |b| b.iter(|| resampler.process(&waveform).unwrap())); + c.bench_function($desc, |b| b.iter(|| resampler.process(black_box(&waveform), None).unwrap())); } }; } @@ -170,7 +171,7 @@ bench_async_resampler!( "sse async nearest 64" ); -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] bench_async_resampler!( f32, AvxInterpolator, @@ -178,7 +179,7 @@ bench_async_resampler!( bench_avx_async_cubic_32, "avx async cubic 32" ); -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] bench_async_resampler!( f32, AvxInterpolator, @@ -186,7 +187,7 @@ bench_async_resampler!( bench_avx_async_linear_32, "avx async linear 32" ); -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] bench_async_resampler!( f32, AvxInterpolator, @@ -194,7 +195,7 @@ bench_async_resampler!( bench_avx_async_nearest_32, "avx async nearest 32" ); -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] bench_async_resampler!( f64, AvxInterpolator, @@ -202,7 +203,7 @@ bench_async_resampler!( bench_avx_async_cubic_64, "avx async cubic 64" ); -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] bench_async_resampler!( f64, AvxInterpolator, @@ -210,7 +211,7 @@ bench_async_resampler!( bench_avx_async_linear_64, "avx async linear 64" ); -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] bench_async_resampler!( f64, AvxInterpolator, @@ -219,7 +220,7 @@ bench_async_resampler!( "avx async nearest 64" ); -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] bench_async_resampler!( f32, NeonInterpolator, @@ -227,7 +228,7 @@ bench_async_resampler!( bench_neon_async_cubic_32, "neon async cubic 32" ); -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] bench_async_resampler!( f32, NeonInterpolator, @@ -235,7 +236,7 @@ bench_async_resampler!( bench_neon_async_linear_32, "neon async linear 32" ); -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] bench_async_resampler!( f32, NeonInterpolator, @@ -243,7 +244,7 @@ bench_async_resampler!( bench_neon_async_nearest_32, "neon async nearest 32" ); -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] bench_async_resampler!( f64, NeonInterpolator, @@ -251,7 +252,7 @@ bench_async_resampler!( bench_neon_async_cubic_64, "neon async cubic 64" ); -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] bench_async_resampler!( f64, NeonInterpolator, @@ -259,7 +260,7 @@ bench_async_resampler!( bench_neon_async_linear_64, "neon async linear 64" ); -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] bench_async_resampler!( f64, NeonInterpolator, @@ -268,26 +269,7 @@ bench_async_resampler!( "neon async nearest 64" ); -#[cfg(all(target_arch = "x86_64", not(feature = "avx")))] -criterion_group!( - benches, - bench_fftfixedin, - bench_fftfixedin_32, - bench_scalar_async_cubic_32, - bench_scalar_async_linear_32, - bench_scalar_async_nearest_32, - bench_scalar_async_cubic_64, - bench_scalar_async_linear_64, - bench_scalar_async_nearest_64, - bench_sse_async_cubic_32, - bench_sse_async_linear_32, - bench_sse_async_nearest_32, - bench_sse_async_cubic_64, - bench_sse_async_linear_64, - bench_sse_async_nearest_64, -); - -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] criterion_group!( benches, bench_fftfixedin, @@ -312,20 +294,7 @@ criterion_group!( bench_avx_async_nearest_64, ); -#[cfg(all(target_arch = "aarch64", not(feature = "neon")))] -criterion_group!( - benches, - bench_fftfixedin, - bench_fftfixedin_32, - bench_scalar_async_cubic_32, - bench_scalar_async_linear_32, - bench_scalar_async_nearest_32, - bench_scalar_async_cubic_64, - bench_scalar_async_linear_64, - bench_scalar_async_nearest_64, -); - -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] criterion_group!( benches, bench_fftfixedin, diff --git a/src/asynchro.rs b/src/asynchro.rs index 100ef97..323cdd4 100644 --- a/src/asynchro.rs +++ b/src/asynchro.rs @@ -1,8 +1,8 @@ use crate::error::{ResampleError, ResampleResult, ResamplerConstructionError}; use crate::interpolation::*; -#[cfg(all(target_arch = "x86_64", feature = "avx"))] +#[cfg(target_arch = "x86_64")] use crate::interpolator_avx::AvxInterpolator; -#[cfg(all(target_arch = "aarch64", feature = "neon"))] +#[cfg(target_arch = "aarch64")] use crate::interpolator_neon::NeonInterpolator; #[cfg(target_arch = "x86_64")] use crate::interpolator_sse::SseInterpolator; @@ -187,7 +187,7 @@ where f_cutoff * resample_ratio as f32 }; - #[cfg(all(target_arch = "x86_64", feature = "avx"))] + #[cfg(target_arch = "x86_64")] if let Ok(interpolator) = AvxInterpolator::::new(sinc_len, oversampling_factor, f_cutoff, window) { @@ -201,7 +201,7 @@ where return Box::new(interpolator); } - #[cfg(all(target_arch = "aarch64", feature = "neon"))] + #[cfg(target_arch = "aarch64")] if let Ok(interpolator) = NeonInterpolator::::new(sinc_len, oversampling_factor, f_cutoff, window) { diff --git a/src/error.rs b/src/error.rs index 8968bbd..0343003 100644 --- a/src/error.rs +++ b/src/error.rs @@ -14,7 +14,7 @@ pub enum CpuFeature { #[cfg(target_arch = "x86_64")] Fma, /// aarc64 neon cpu feature. - #[cfg(all(feature = "neon", target_arch = "aarch64"))] + #[cfg(target_arch = "aarch64")] Neon, } @@ -34,7 +34,7 @@ impl CpuFeature { CpuFeature::Fma => { is_x86_feature_detected!("fma") } - #[cfg(all(feature = "neon", target_arch = "aarch64"))] + #[cfg(target_arch = "aarch64")] CpuFeature::Neon => { std::arch::is_aarch64_feature_detected!("neon") } @@ -58,7 +58,7 @@ impl fmt::Display for CpuFeature { CpuFeature::Fma => { write!(f, "fma") } - #[cfg(all(feature = "neon", target_arch = "aarch64"))] + #[cfg(target_arch = "aarch64")] CpuFeature::Neon => { write!(f, "neon") } diff --git a/src/lib.rs b/src/lib.rs index 719d3cc..5fb9896 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -40,15 +40,13 @@ //! //! #### Asynchronous resampling //! -//! The asynchronous resampler is designed to benefit from auto-vectorization, meaning that the Rust compiler -//! can recognize calculations that can be done in parallel. It will then use SIMD instructions for those. -//! This works quite well, but there is still room for improvement. -//! To address this, it also has optimized SIMD support. -//! This gets enabled at runtime by checking the SIMD support of the CPU. +//! The asynchronous resampler supports SIMD on x86_64 and on aarch64. +//! The SIMD capabilities of the CPU are determined at runtime. +//! If no supported SIMD instruction set is available, it falls back to a scalar implementation. //! -//! On x86_64 it will try to use SSE3. The speed benefit compared to auto-vectorization -//! depends on the CPU, but tends to be in the range 20-30% for 64-bit data, and 50-100% for 32-bit data. -//! There is also optional support for AVX on x86_64, and Neon on aarch64 via Cargo features. +//! On x86_64 it will try to use AVX. If AVX isn't available, it will instead try SSE3. +//! +//! On aarch64 (64-bit Arm) it will use Neon if available. //! //! #### Synchronous resampling //! @@ -56,20 +54,6 @@ //! //! ### Cargo features //! -//! ##### `avx`: AVX on x86_64 -//! -//! The `avx` feature is enabled by default, and enables the use of AVX when it's available. -//! The speed increase compared to SSE depends on the CPU, and tends to range from zero to 50%. -//! On other architectures than x86_64 the `avx` feature does nothing. -//! -//! ##### `neon`: Experimental Neon support on aarch64 -//! -//! Experimental support for Neon is available for aarch64 (64-bit Arm) by enabling the `neon` feature. -//! This requires the use of a nightly compiler, as the Neon support in Rust is still experimental. -//! On a Raspberry Pi 4, this gives a boost of about 10% for 64-bit floats and 50% for 32-bit floats when -//! compared to the auto-vectorized implementation. -//! Note that this only works on a full 64-bit operating system. -//! //! ##### `log`: Enable logging //! //! This feature enables logging via the `log` crate. This is intended for debugging purposes. @@ -104,10 +88,12 @@ //! //! ## Compatibility //! -//! The `rubato` crate requires rustc version 1.40 or newer. +//! The `rubato` crate requires rustc version 1.61 or newer. //! //! ## Changelog //! +//! - v0.12.0 +//! - Always enable all simd acceleration (and remove the simd Cargo features). //! - v0.11.0 //! - New api to allow use in realtime applications. //! - Configurable adjust range of asynchronous resamplers. @@ -118,9 +104,6 @@ //! - v0.9.0 //! - Accept any AsRef<[T]> as input. -#![cfg_attr(feature = "neon", feature(aarch64_target_feature))] -#![cfg_attr(feature = "neon", feature(stdsimd))] - #[cfg(feature = "log")] extern crate log; @@ -199,7 +182,7 @@ macro_rules! interpolator { } interpolator! { - #[cfg(all(target_arch = "x86_64", feature = "avx"))] + #[cfg(target_arch = "x86_64")] mod interpolator_avx; trait AvxSample; } @@ -211,7 +194,7 @@ interpolator! { } interpolator! { - #[cfg(all(target_arch = "aarch64", feature = "neon"))] + #[cfg(target_arch = "aarch64")] mod interpolator_neon; trait NeonSample; }