HEnquist · HEnquist · Mar 28, 2022 · Mar 21, 2022 · Mar 21, 2022 · Mar 21, 2022
diff --git a/.github/workflows/ci_test.yml b/.github/workflows/ci_test.yml
@@ -10,11 +10,11 @@ jobs:
       - name: Checkout sources
         uses: actions/checkout@v2
 
-      - name: Install stable toolchain
+      - name: Install nightly toolchain
         uses: actions-rs/toolchain@v1
         with:
           profile: minimal
-          toolchain: stable
+          toolchain: nightly
           override: true
 
       - name: Run cargo check
@@ -35,11 +35,11 @@ jobs:
       - name: Checkout sources
         uses: actions/checkout@v2
 
-      - name: Install stable toolchain
+      - name: Install nightly toolchain
         uses: actions-rs/toolchain@v1
         with:
           profile: minimal
-          toolchain: stable
+          toolchain: nightly
           override: true
 
       - name: Run cargo test
@@ -54,11 +54,11 @@ jobs:
       - name: Checkout sources
         uses: actions/checkout@v2
 
-      - name: Install stable toolchain
+      - name: Install nightly toolchain
         uses: actions-rs/toolchain@v1
         with:
           profile: minimal
-          toolchain: stable
+          toolchain: nightly
           override: true
           components: rustfmt, clippy
 
@@ -96,39 +96,25 @@ jobs:
           use-cross: true
           args: --target aarch64-unknown-linux-gnu
 
-      - name: Run cargo check with neon
-        uses: actions-rs/cargo@v1
-        with:
-          command: check
-          use-cross: true
-          args: --target aarch64-unknown-linux-gnu --features neon
-
       - name: Run cargo test for arm
         uses: actions-rs/cargo@v1
         with:
           command: test
           use-cross: true
           args: --target aarch64-unknown-linux-gnu
 
-      - name: Run cargo test for arm with neon
-        uses: actions-rs/cargo@v1
-        with:
-          command: test
-          use-cross: true
-          args: --target aarch64-unknown-linux-gnu --features neon
-
   check_test_wasm32:
     name: Check and test wasm32
     runs-on: ubuntu-latest
     steps:
       - name: Checkout sources
         uses: actions/checkout@v2
 
-      - name: Install stable toolchain
+      - name: Install nightly toolchain
         uses: actions-rs/toolchain@v1
         with:
           profile: minimal
-          toolchain: stable
+          toolchain: nightly
           target: wasm32-unknown-unknown
           override: true
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,7 @@
 [package]
 name = "rubato"
-version = "0.11.0"
+version = "0.12.0"
+rust-version = "1.61"
 authors = ["HEnquist <henrik.enquist@gmail.com>"]
 description = "Asynchronous resampling library intended for audio data"
 license = "MIT"
@@ -12,11 +13,6 @@ edition = "2018"
 
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
-[features]
-default = ["avx"]
-avx = []
-neon = []
-
 [dependencies]
 log = { version = "0.4.14", optional = true }
 realfft = "3.0.0"

diff --git a/README.md b/README.md
@@ -42,36 +42,20 @@ This type of resampler is considerably faster but doesn't support changing the r
 
 ##### Asynchronous resampling
 
-The asynchronous resampler is designed to benefit from auto-vectorization, meaning that the Rust compiler
-can recognize calculations that can be done in parallel. It will then use SIMD instructions for those.
-This works quite well, but there is still room for improvement.
-To address this, it also has optimized SIMD support.
-This gets enabled at runtime by checking the SIMD support of the CPU.
+The asynchronous resampler supports SIMD on x86_64 and on aarch64.
+The SIMD capabilities of the CPU are determined at runtime.
+If no supported SIMD instruction set is available, it falls back to a scalar implementation.
 
-On x86_64 it will try to use SSE3. The speed benefit compared to auto-vectorization
-depends on the CPU, but tends to be in the range 20-30% for 64-bit data, and 50-100% for 32-bit data.
-There is also optional support for AVX on x86_64, and Neon on aarch64 via Cargo features.
+On x86_64 it will try to use AVX. If AVX isn't available, it will instead try SSE3.
+
+On aarch64 (64-bit Arm) it will use Neon if available.
 
 ##### Synchronous resampling
 
 The synchronous resamplers benefit from the SIMD support of the RustFFT library.
 
 #### Cargo features
 
-###### `avx`: AVX on x86_64
-
-The `avx` feature is enabled by default, and enables the use of AVX when it's available.
-The speed increase compared to SSE depends on the CPU, and tends to range from zero to 50%.
-On other architectures than x86_64 the `avx` feature does nothing.
-
-###### `neon`: Experimental Neon support on aarch64
-
-Experimental support for Neon is available for aarch64 (64-bit Arm) by enabling the `neon` feature.
-This requires the use of a nightly compiler, as the Neon support in Rust is still experimental.
-On a Raspberry Pi 4, this gives a boost of about 10% for 64-bit floats and 50% for 32-bit floats when
-compared to the auto-vectorized implementation.
-Note that this only works on a full 64-bit operating system.
-
 ###### `log`: Enable logging
 
 This feature enables logging via the `log` crate. This is intended for debugging purposes.
@@ -106,10 +90,12 @@ let waves_out = resampler.process(&waves_in, None).unwrap();
 
 ### Compatibility
 
-The `rubato` crate requires rustc version 1.40 or newer.
+The `rubato` crate requires rustc version 1.61 or newer.
 
 ### Changelog
 
+- v0.12.0
+  - Always enable all simd acceleration (and remove the simd Cargo features).
 - v0.11.0
   - New api to allow use in realtime applications.
   - Configurable adjust range of asynchronous resamplers.

diff --git a/benches/resamplers.rs b/benches/resamplers.rs
@@ -1,11 +1,11 @@
-use criterion::{criterion_group, criterion_main, Criterion};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
 extern crate rubato;
 
 use rubato::ScalarInterpolator;
 
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 use rubato::interpolator_avx::AvxInterpolator;
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 use rubato::interpolator_neon::NeonInterpolator;
 #[cfg(target_arch = "x86_64")]
 use rubato::interpolator_sse::SseInterpolator;
@@ -14,19 +14,19 @@ use rubato::{FftFixedIn, InterpolationType, Resampler, SincFixedIn, WindowFuncti
 
 fn bench_fftfixedin(c: &mut Criterion) {
     let chunksize = 1024;
-    let mut resampler = FftFixedIn::<f64>::new(44100, 192000, 1024, 2, 1);
+    let mut resampler = FftFixedIn::<f64>::new(44100, 192000, 1024, 2, 1).unwrap();
     let waveform = vec![vec![0.0 as f64; chunksize]; 1];
     c.bench_function("FftFixedIn f64", |b| {
-        b.iter(|| resampler.process(&waveform).unwrap())
+        b.iter(|| resampler.process(black_box(&waveform), None).unwrap())
     });
 }
 
 fn bench_fftfixedin_32(c: &mut Criterion) {
     let chunksize = 1024;
-    let mut resampler = FftFixedIn::<f32>::new(44100, 192000, 1024, 2, 1);
+    let mut resampler = FftFixedIn::<f32>::new(44100, 192000, 1024, 2, 1).unwrap();
     let waveform = vec![vec![0.0 as f32; chunksize]; 1];
     c.bench_function("FftFixedIn f32", |b| {
-        b.iter(|| resampler.process(&waveform).unwrap())
+        b.iter(|| resampler.process(black_box(&waveform), None).unwrap())
     });
 }
 
@@ -61,13 +61,14 @@ macro_rules! bench_async_resampler {
             let interpolator = Box::new(interpolator);
             let mut resampler = SincFixedIn::<$ft>::new_with_interpolator(
                 resample_ratio,
+                1.1,
                 interpolation_type,
                 interpolator,
                 chunksize,
                 1,
-            );
+            ).unwrap();
             let waveform = vec![vec![0.0 as $ft; chunksize]; 1];
-            c.bench_function($desc, |b| b.iter(|| resampler.process(&waveform).unwrap()));
+            c.bench_function($desc, |b| b.iter(|| resampler.process(black_box(&waveform), None).unwrap()));
         }
     };
 }
@@ -170,47 +171,47 @@ bench_async_resampler!(
     "sse async nearest 64"
 );
 
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 bench_async_resampler!(
     f32,
     AvxInterpolator,
     InterpolationType::Cubic,
     bench_avx_async_cubic_32,
     "avx async cubic   32"
 );
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 bench_async_resampler!(
     f32,
     AvxInterpolator,
     InterpolationType::Linear,
     bench_avx_async_linear_32,
     "avx async linear  32"
 );
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 bench_async_resampler!(
     f32,
     AvxInterpolator,
     InterpolationType::Nearest,
     bench_avx_async_nearest_32,
     "avx async nearest 32"
 );
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 bench_async_resampler!(
     f64,
     AvxInterpolator,
     InterpolationType::Cubic,
     bench_avx_async_cubic_64,
     "avx async cubic   64"
 );
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 bench_async_resampler!(
     f64,
     AvxInterpolator,
     InterpolationType::Linear,
     bench_avx_async_linear_64,
     "avx async linear  64"
 );
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 bench_async_resampler!(
     f64,
     AvxInterpolator,
@@ -219,47 +220,47 @@ bench_async_resampler!(
     "avx async nearest 64"
 );
 
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 bench_async_resampler!(
     f32,
     NeonInterpolator,
     InterpolationType::Cubic,
     bench_neon_async_cubic_32,
     "neon async cubic   32"
 );
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 bench_async_resampler!(
     f32,
     NeonInterpolator,
     InterpolationType::Linear,
     bench_neon_async_linear_32,
     "neon async linear  32"
 );
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 bench_async_resampler!(
     f32,
     NeonInterpolator,
     InterpolationType::Nearest,
     bench_neon_async_nearest_32,
     "neon async nearest 32"
 );
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 bench_async_resampler!(
     f64,
     NeonInterpolator,
     InterpolationType::Cubic,
     bench_neon_async_cubic_64,
     "neon async cubic   64"
 );
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 bench_async_resampler!(
     f64,
     NeonInterpolator,
     InterpolationType::Linear,
     bench_neon_async_linear_64,
     "neon async linear  64"
 );
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 bench_async_resampler!(
     f64,
     NeonInterpolator,
@@ -268,26 +269,7 @@ bench_async_resampler!(
     "neon async nearest 64"
 );
 
-#[cfg(all(target_arch = "x86_64", not(feature = "avx")))]
-criterion_group!(
-    benches,
-    bench_fftfixedin,
-    bench_fftfixedin_32,
-    bench_scalar_async_cubic_32,
-    bench_scalar_async_linear_32,
-    bench_scalar_async_nearest_32,
-    bench_scalar_async_cubic_64,
-    bench_scalar_async_linear_64,
-    bench_scalar_async_nearest_64,
-    bench_sse_async_cubic_32,
-    bench_sse_async_linear_32,
-    bench_sse_async_nearest_32,
-    bench_sse_async_cubic_64,
-    bench_sse_async_linear_64,
-    bench_sse_async_nearest_64,
-);
-
-#[cfg(all(target_arch = "x86_64", feature = "avx"))]
+#[cfg(target_arch = "x86_64")]
 criterion_group!(
     benches,
     bench_fftfixedin,
@@ -312,20 +294,7 @@ criterion_group!(
     bench_avx_async_nearest_64,
 );
 
-#[cfg(all(target_arch = "aarch64", not(feature = "neon")))]
-criterion_group!(
-    benches,
-    bench_fftfixedin,
-    bench_fftfixedin_32,
-    bench_scalar_async_cubic_32,
-    bench_scalar_async_linear_32,
-    bench_scalar_async_nearest_32,
-    bench_scalar_async_cubic_64,
-    bench_scalar_async_linear_64,
-    bench_scalar_async_nearest_64,
-);
-
-#[cfg(all(target_arch = "aarch64", feature = "neon"))]
+#[cfg(target_arch = "aarch64")]
 criterion_group!(
     benches,
     bench_fftfixedin,