From 624f9e44e68a45e861f911e39a6e5ff89d767488 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 3 Jul 2026 19:14:18 +0000 Subject: [PATCH 1/3] Add docs/PERCEPTUAL-VERIFICATION.md: measurement + listening gates for xtc~/room~ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Answers the ROADMAP Wave 3 open question ('define the protocol before building them'): deterministic numeric gates (XTC rejection/robustness/ coloration/stability; room ER exactness, T20/EDT/C50/C80, per-order energy, tail IACC) that run as CI-able offline computations, a small formal listening protocol with per-question pass criteria, and the bypass rule — a perceptual module that does not beat bypass in listening does not ship. Flags the KEMAR-vs-per-listener plant and FDN-vs-convolution tail decisions. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_012VeadvCRUHJdneFNwRbFAM --- docs/PERCEPTUAL-VERIFICATION.md | 238 ++++++++++++++++++++++++++++++++ 1 file changed, 238 insertions(+) create mode 100644 docs/PERCEPTUAL-VERIFICATION.md diff --git a/docs/PERCEPTUAL-VERIFICATION.md b/docs/PERCEPTUAL-VERIFICATION.md new file mode 100644 index 0000000..1bb3525 --- /dev/null +++ b/docs/PERCEPTUAL-VERIFICATION.md @@ -0,0 +1,238 @@ +# Perceptual verification — `ambitap.xtc~` and `ambitap.room~` + +Measurement + listening protocol for the two Wave-3 perceptual objects +(ROADMAP items 3–4). This document is a **prerequisite**: per the roadmap, +neither object gets built until this protocol exists, and neither ships +until it passes. + +The premise, restated from the roadmap: these modules are +*listening-dependent*. The rest of the library is verifiable to ~1e-7 +against closed-form references; these two are not. A crosstalk canceller +with wrong regularization colors the sound and smears transients; a bad +early-reflection pattern destroys localization instead of adding +externalization. In both cases **the failure mode sounds worse than doing +nothing** — so "the math ran without asserting" proves nothing, and the +correctness bar has two layers: + +1. **Numeric gates** — objective metrics computed offline from the designed + filters / rendered impulse responses. Deterministic, thresholded, run in + CI exactly like the existing quality gates. +2. **A listening pass** — small, scripted, repeatable, recorded. A release + checklist item, not CI. + +**The bypass rule** (non-negotiable, from the roadmap framing): each module +is compared against BYPASS in the listening pass — plain stereo playback +for `xtc~`, anechoic encode→binaural for `room~`. If listeners do not +prefer the module over bypass on its headline question, **it does not +ship**, regardless of how green the numeric gates are. + +Every threshold below is an **initial engineering target**: set from the +literature and first principles, expected to be revised (and the revision +recorded here) after the first real measurement run. Thresholds are +targets; the *metric definitions* and the bypass rule are the contract. + +**Harness convention.** Same shape as the existing verification layer: an +executed notebook per module (`notebooks/xtc_verification.ipynb`, +`notebooks/room_verification.ipynb`) driving the real implementation +through the C ABI (`tools/capi/`, loaded via `notebooks/ambitap_py.py`), +with assert cells enforcing every numeric gate. Fixed seeds, fixed +geometry, byte-stable outputs — the notebooks are the plots+numbers +artifact; the asserts are mirrored as plain C++ tests +(`tests/test_xtc_metrics.cpp`, `tests/test_room_metrics.cpp`) so the gate +runs in the ordinary ctest suite without a Python leg. One capi addition is +needed up front: export the **time-domain** SH-reconstructed HRIR at a +direction (the inner loop of `binaural_renderer::probe_response`, which +currently returns magnitude only) — the XTC plant model and the room +binauralization both need the complex response. + +--- + +## `ambitap.xtc~` — transaural / crosstalk cancellation + +### Plant model and what is being measured + +The object designs a 2×2 filter matrix **H**(f) for a stated geometry +(speaker span ±θ, distance d). The plant **C**(f) is the 2×2 matrix of +head-related transfer functions from each speaker to each ear — +reconstructed from the built-in KEMAR SH set at (±θ, 0) elevation, at the +design distance. Everything below is computed from the **performance +matrix P(f) = C(f)·H(f)** — no audio, no room, no listener needed, fully +deterministic. (An in-room *measured* plant is explicitly out of scope for +v1; see open questions.) + +### Numeric gates + +| # | Metric | Definition | Gate (initial target) | +|---|---|---|---| +| X1 | Crosstalk rejection spectrum | XTC(f) = 20·log10(\|P_ii\| / \|P_ij\|), worse ear | ≥ 20 dB mean and ≥ 15 dB minimum over **300 Hz – 6 kHz**, at spans ±10°, ±20°, ±30° | +| X2 | Robustness — translation | Recompute P with the head displaced; H fixed | ≥ 12 dB mean in-band XTC at ±2 cm lateral; XTC ≥ 0 dB (never worse than bypass) at ±5 cm | +| X3 | Robustness — rotation | Head yaw ±5°, ±10°; H fixed | ≥ 12 dB mean in-band at ±5°; ≥ 6 dB at ±10° | +| X4 | Coloration budget | \|P_ii(f)\|, 1/3-octave smoothed, re. its 300 Hz–6 kHz mean | within **±3 dB** over 200 Hz – 8 kHz at the design point; within ±6 dB under the X2/X3 offsets | +| X5 | Filter gain ceiling | max over f, over all four \|H_ij(f)\| | ≤ **+12 dB**; object applies matching static makeup attenuation so full-scale input cannot clip | +| X6 | Determinism | design twice at identical parameters | byte-identical filters | + +Rationale, per gate: + +- **X1 — why 300 Hz–6 kHz and not full band.** Below ~300 Hz the + inter-ear path difference is a tiny fraction of a wavelength: C is + nearly singular, and inverting it demands enormous filter gains for a + cue (low-frequency ILD) that barely exists perceptually. Above ~6 kHz, + cancellation depends on sub-centimeter head placement and on + individual-pinna detail the KEMAR plant cannot predict — "rejection" + there is fiction that evaporates the moment a real listener sits down. + The regularization is *supposed* to give up outside the band; a design + that claims full-band rejection is over-fit to the dummy head. 20 dB + in-band is the level Choueiri's BACCH work treats as perceptually + sufficient; 15 dB minimum keeps any single notch from collapsing the + image. +- **X2/X3 — the sweet spot must fail gracefully.** The gate is + two-sided: retain useful rejection at small offsets (a seated listener + breathes and shifts by centimeters), and *never* dip below 0 dB XTC or + blow the ±6 dB coloration budget at moderate offsets — an off-axis + listener should hear approximately ordinary stereo, not a comb filter. + The notebook additionally reports the contour of the ≥ 15 dB region + (sweet-spot width in cm/degrees) as an informational plot, not a gate. +- **X4 — the regularization vs coloration tradeoff, measured.** This is + the canonical failure Choueiri identified: frequency-independent + regularization of the 2×2 inverse produces dB-scale peaks/notches at + the ipsilateral ear. ±3 dB (1/3-octave) is the budget; if the design + cannot meet X1 and X4 simultaneously, the regularization must become + frequency-dependent (BACCH-style) — the gates force that decision + early rather than after someone hears it. +- **X5 — stability/headroom.** The gain ceiling is what the + regularization parameter buys; +12 dB is the budget for both dynamic + range loss and speaker stress. The ceiling, not the rejection, is what + the regularization knob trades away — report both together. + +### Listening pass + +Setup: two loudspeakers at a measured design span (start with ±15°, +desktop geometry), listening position marked, program loudness-matched +(±0.5 LU) between conditions. Minimum **3 listeners**, at least one not +involved in development. Stimuli: dry speech (one male, one female), +percussive transients (claps, rim shots), broadband sustained (pink-noise +bursts, string pad), plus one binaural scene rendered with +`ambitap.binaural~` containing hard-lateral (±90°) and rear sources. + +| Question | Method | Pass | +|---|---|---| +| Image widening / lateralization | A/B vs bypass, binaural scene: report perceived azimuth of the ±90° sources | ≥ 80% of trials place them clearly outside the speaker span, each listener | +| Front/back | rear source in the scene, A/B vs bypass: "behind or in front?" | majority "behind" with xtc~ on, and strictly better than bypass's score | +| Coloration | ABX, mono frontal speech, xtc~ vs bypass, then preference | audible difference is expected (ABX will pass); *preference* must not favor bypass, per listener over ≥ 10 trials | +| Transient integrity | percussive stimulus: any pre-echo, chirp, ringing? | zero artifact reports | +| Graceful failure | listener deliberately moves ±10 cm / turns head | image collapses toward ordinary stereo; no comb/chirp artifacts reported | + +Ship rule: the widening question is the headline — if `xtc~` does not +beat bypass there, or loses the coloration preference, it does not ship. + +--- + +## `ambitap.room~` — SH-domain early reflections + spatial reverb + +### What is being measured + +The module has two parts with different verifiability. The **image-source +early reflections** for a shoebox are exactly computable — arrival times, +directions, and levels have closed-form ground truth, so they get +tight tolerances. The **tail** (FDN or convolution in the SH domain) is +statistical — it gets ISO 3382-style energy metrics plus SH- and +interaural-domain diffuseness checks. All metrics are computed from a +rendered SH impulse response (impulse in → SH IR out, fixed seed) and its +binauralization through `dsp::binaural_renderer`; the ISO 3382 machinery +(Schroeder integration, octave bands) lives in the notebook. + +### Numeric gates + +| # | Metric | Definition | Gate (initial target) | +|---|---|---|---| +| R1 | ER arrival times | first ~20 image-source arrivals in the SH IR vs analytic shoebox times | within **±1 sample** at 48 kHz | +| R2 | ER directions | `analysis::energy_vector` DOA on a window around each arrival | within **5°** of the analytic image direction (order ≥ 3 render) | +| R3 | ER levels | per-arrival energy vs 1/r · Π(wall reflection coefficients) | within **±0.5 dB** | +| R4 | Decay time | Schroeder T20 per octave band, 250 Hz – 4 kHz, on the omni (W) channel | within **±10%** of the parameterized RT60, every band | +| R5 | EDT | early decay time (0…−10 dB), same bands | within **±25%** of parameterized RT60; no double-slope unless parameterized | +| R6 | Clarity | C50 and C80 from the rendered IR vs the analytic prediction of the direct + ER + exponential-tail parameterization | within **±2 dB**, and strictly monotone decreasing in source distance across a 3-distance sweep | +| R7 | SH order balance | tail (t > 80 ms) energy per SH order n, normalized per channel of that order | flat within **±1.5 dB** across orders — no order-dependent coloration of the diffuse field | +| R8 | Tail isotropy | \|rE\| (energy-vector magnitude) of the late tail, 20 ms windows, averaged | ≤ **0.1** | +| R9 | Interaural coherence | IACC of the binauralized tail (t > 80 ms), per octave band | broadband ≤ **0.3**; per-band within **0.15** of the KEMAR diffuse-field coherence curve above 500 Hz (below 500 Hz diffuse coherence is naturally high — track the reference, don't chase 0) | +| R10 | Determinism | fixed seed, two renders | byte-identical SH IR | + +Rationale: R1–R3 are the "checkable exactly" layer — if the image-source +model is wrong, no listening test is needed to reject it. R4/R5 verify +the tail *does what the parameter says* (a reverb whose T60 knob lies is +broken even if it sounds pleasant). R6 ties the direct/early/late balance +to the distance parameter — this is the objective shadow of the distance +listening question. R7 is the SH-specific trap: an FDN that feeds orders +unevenly (or a decoder-side max-rE weighting leaking into the tail) +produces a tail whose *timbre changes with playback order* — invisible in +any single stereo render, caught by construction here. R8/R9 are the +diffuseness of the tail in the two domains that matter: the SH field +itself, and at the ears — late IACC near the diffuse-field reference is +the strongest known objective correlate of externalization and perceived +envelopment. + +### Listening pass + +Headphone-based (the module's headline value is making binaural +externalize), binaural rendering via `ambitap.binaural~`, loudness-matched +(±0.5 LU). Minimum **3 listeners**, one non-developer. Stimuli: dry +speech, percussion, sustained broadband — all anechoic sources. + +| Question | Method | Pass | +|---|---|---| +| Externalization | A/B: encode→room~→binaural vs encode→binaural (BYPASS = anechoic) — "which is more outside your head?" | ≥ 80% of trials choose room~, each listener | +| Localization preserved | source at ±45°: does adding the room smear or drag the direct-sound image? | reported azimuth within ±15° of the dry condition | +| Distance | 3 parameterized distances, random order, rank them | correct ranking ≥ 80% of trials | +| Tail naturalness | rating 1–5 after A/B vs bypass and vs one reference convolution reverb IR; explicit artifact checklist (metallic ringing, flutter, chirping, pumping) | mean ≥ 3.5 and zero artifact reports on percussion (percussion is where FDN metallicity lives) | + +Ship rule: externalization is the headline — the roadmap's whole case for +`room~` is "room/reflections are what make binaural externalize". If it +does not beat anechoic bypass on that question, it does not ship. A tail +that externalizes but rings metallic on percussion also does not ship +(naturalness gate). + +--- + +## Gating — what runs where + +| Check | Kind | Runs | +|---|---|---| +| X1–X6, R1–R10 | numeric, deterministic | CI: `tests/test_xtc_metrics.cpp`, `tests/test_room_metrics.cpp` in the ordinary ctest suite; the executed notebooks re-run the same asserts with plots and are re-executed whenever the module DSP changes | +| Listening passes | human | Release checklist. Required before first ship, and re-run whenever the filter design / room model changes (not for wrapper-glue or packaging changes) | +| Threshold revisions | process | any threshold change lands as a diff to this file with one line of justification | + +Artifacts: the executed notebooks live in `notebooks/` (same convention as +the existing verification notebooks); the listening results are recorded +in the table below — date, commit, listener count, and the headline +numbers, so "it passed" is always attributable to a build. + +### Results log + +| Date | Commit | Module | Numeric gates | Listening (n, headline result) | Verdict | +|---|---|---|---|---|---| +| — | — | — | *no runs yet — modules not built* | — | — | + +--- + +## Open questions (decide before or during first measurement) + +1. **KEMAR plant vs real heads (xtc~).** The numeric gates use the KEMAR + plant; real listeners' HRTFs differ, so measured-on-KEMAR 20 dB will be + less on a human. The listening pass is the real gate for this; if the + first session shows the KEMAR-designed filters underperforming + audibly, the options are a structural (HRTF-free) canceller mode or + per-listener plant loading via the existing SOFA path. Decide after + the first session, not before. +2. **Free geometry vs presets (xtc~).** The protocol assumes a computed + plant from (span, distance). Supporting arbitrary/measured in-room + plants is a different verification problem (measurement error enters + the gate) — proposed: v1 ships computed-plant presets only. +3. **Tail architecture (room~).** FDN vs SH-domain convolution changes + which gates are tight vs statistical (R4–R9 are written to hold for + either). The R7/R9 gates are the ones most likely to force the + architecture choice — if an FDN can't hit diffuse-field IACC, the + answer is convolution or a hybrid, and it's cheaper to learn that from + the notebook than from listeners. +4. **Listener pool.** Three is the floor for a solo project; if a Wave-3 + release gathers external testers, raise the externalization and + widening gates from "each listener ≥ 80%" to a binomial-test criterion + (p < 0.05 vs chance) over the pooled trials. From a8f2e5ed99b66ef7f8e7fe1b40fc7b86d76bc847 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 3 Jul 2026 19:26:38 +0000 Subject: [PATCH 2/3] =?UTF-8?q?Add=20dsp::nfc=20=E2=80=94=20near-field=20c?= =?UTF-8?q?ompensation=20(NFC-HOA),=20per-order=20shelving?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fills the near-field gap flagged in docs/ROADMAP.md (Wave 3 item 2): per ambisonic order m the compensation is the ratio of spherical-wave radial terms H_m(s) = F_m(s, r_src)/F_m(s, r_ref), F_m built from the reverse Bessel polynomial; the m roots (Durand-Kerner in double, self-contained — no Eigen, no ) become analog zeros scaled by c/r_src and poles by c/r_ref, discretized per first/second-order section by bilinear transform (exact at DC: closed-form DC gain (r_ref/r_src)^m; bit-exact identity at r_src == r_ref). Distances clamp at 0.1 m (DC boost is unbounded as r_src -> 0). Applied per ACN channel by its order; W passes through. House lifecycle: ctor validates/allocates, prepare() snaps, setters rebuild coefficients in double on the control thread and publish through smoothed_table (distance changes move only numerator coefficients, so ramped interpolation stays inside the biquad stability triangle); process()/process_frame() are noexcept wait-free float32. Verified freestanding-compatible (-fno-exceptions profile compiles clean); not yet added to the embedded gate. Tests: identity, DC closed form (cut + boost, orders 1..10), clamping, impulse decay at aggressive distances, mid-ramp stability, and nfc added to the stateful allocation-guard RT-safety test. 121/121 green, also under AMBITAP_WERROR=ON; clang-format clean. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_012VeadvCRUHJdneFNwRbFAM --- include/ambitap/ambitap.h | 1 + include/ambitap/dsp/nfc.h | 431 ++++++++++++++++++++++++++++++++++++++ tests/CMakeLists.txt | 1 + tests/test_nfc.cpp | 162 ++++++++++++++ tests/test_rt_safety.cpp | 8 + 5 files changed, 603 insertions(+) create mode 100644 include/ambitap/dsp/nfc.h create mode 100644 tests/test_nfc.cpp diff --git a/include/ambitap/ambitap.h b/include/ambitap/ambitap.h index ad4f4bf..2b69b35 100644 --- a/include/ambitap/ambitap.h +++ b/include/ambitap/ambitap.h @@ -15,6 +15,7 @@ #include "dsp/encoder.h" #include "dsp/format_converter.h" #include "dsp/mirror.h" +#include "dsp/nfc.h" #include "dsp/rotator.h" #include "dsp/spatial_compressor.h" #include "dsp/util/async_rebuilder.h" diff --git a/include/ambitap/dsp/nfc.h b/include/ambitap/dsp/nfc.h new file mode 100644 index 0000000..d3918b4 --- /dev/null +++ b/include/ambitap/dsp/nfc.h @@ -0,0 +1,431 @@ +/// AmbiTap: target-independent ambisonics library +/// Near-field compensation (NFC-HOA) — per-order bass shelving that corrects +/// the near-field effect of a spherical wave for HOA signals (Daniel's +/// formulation). +/// Timothy Place +/// Copyright 2026 Timothy Place. + +#ifndef AMBITAP_DSP_NFC_H +#define AMBITAP_DSP_NFC_H + +#include "../math/core/indexing.h" +#include "../math/core/validate.h" +#include "util/smoothing.h" + +#include +#include +#include +#include +#include + +namespace ambitap::dsp { + + /// Sections in the IIR cascade of a single order-m NFC filter: ceil(m/2) + /// (floor(m/2) biquads plus, for odd m, one first-order section). + constexpr size_t nfc_sections_for_order(int m) { + return static_cast(m + 1) / 2; + } + + /// Total cascade sections across the order-1..N filters of an order-N bus. + constexpr size_t nfc_total_sections(int order) { + size_t s = 0; + for (int m = 1; m <= order; ++m) s += nfc_sections_for_order(m); + return s; + } + + /// Near-field compensation filter for a higher-order ambisonics bus + /// (NFC-HOA, Jérôme Daniel's formulation). + /// + /// A point source at finite distance r is not a plane wave: its order-m + /// radial term carries the spherical-Hankel near-field factor, which in the + /// Laplace domain is the reverse Bessel polynomial ratio + /// + /// F_m(s, r) = theta_m(s r / c) / (s r / c)^m, + /// theta_m(x) = sum_{n=0..m} (m+n)! / ((m-n)! n! 2^n) x^(m-n). + /// + /// Encoding a source at distance r_src for reproduction by loudspeakers + /// (or a decoder reference) at distance r_ref requires, per order m, + /// + /// H_m(s) = F_m(s, r_src) / F_m(s, r_ref) + /// = (r_ref / r_src)^m + /// · theta_m(s·r_src/c) / theta_m(s·r_ref/c), + /// + /// a stable m-pole / m-zero low shelf: unity at high frequency and exactly + /// (r_ref / r_src)^m at DC — a bass boost for sources inside the reference + /// radius, a bass cut outside it, identity when r_src == r_ref. Applied per + /// ACN channel by its order (order 0 passes through unfiltered). + /// + /// Realization: the roots of theta_m (the classical Bessel-filter pole set, + /// all in the left half-plane) are found once per order at construction; + /// each conjugate pair — plus the one real root for odd m — becomes one + /// section whose analog zeros are the roots scaled by c/r_src and whose + /// poles are the same roots scaled by c/r_ref. Each section is discretized + /// with the bilinear transform s = 2·fs·(1 - z⁻¹)/(1 + z⁻¹) (no prewarping: + /// the mapping is exact at DC, where the shelf lives, and unity at Nyquist + /// by construction). + /// + /// Numerical care: the DC boost (r_ref/r_src)^m grows without bound as + /// r_src → 0, so both distances are clamped to k_min_distance (0.1 m). + /// Even so, small source distances at high orders are extreme by nature — + /// e.g. r_src = 0.1 m, r_ref = 1 m at order 5 is a +100 dB bass boost. + /// The audio path is float32 (the embedded contract): the shelf corners + /// sit far below fs, so coefficient quantization bounds the realized + /// low-frequency gain to within about 0.03 dB of the closed form at + /// 48 kHz and ordinary distances. + /// + /// Lifecycle: construct with the ambisonics order (validates, allocates, + /// finds the Bessel roots); the filter is immediately usable at the default + /// 48 kHz — call prepare() with the real sample rate before audio starts + /// (it snaps coefficients and clears state; control thread only). + /// + /// Threading: setters run on one control thread; they recompute the + /// coefficient table in double precision and publish it through a + /// smoothed_table, which the audio thread ramps in over + /// k_smoothing_samples. Ramping is always stable: set_source_distance() + /// moves only numerator coefficients, and the biquad stability region in + /// the (a1, a2) plane is convex, so a linear path between two stable + /// denominators stays stable. process() is wait-free (no allocation, + /// locks, or syscalls). Call snap_parameters() for offline/exact use. + class nfc { + public: + /// Minimum distance both r_src and r_ref are clamped to. The order-m + /// DC gain is (r_ref/r_src)^m, unbounded as r_src → 0; the clamp keeps + /// the filter finite and well-conditioned at every supported order. + static constexpr float k_min_distance = 0.1f; + + private: + /// [b0, b1, b2, a1, a2] per section. + static constexpr size_t k_section_coeffs = 5; + static constexpr size_t k_max_coeffs = k_section_coeffs * nfc_total_sections(max_order); + + int m_order; + size_t m_channels; + float m_fs {48000.0f}; + float m_source_distance {1.0f}; + float m_reference_distance {1.0f}; + float m_speed_of_sound {343.0f}; + + /// One root of theta_m per section: conjugate pairs stored as + /// (re, im > 0); the odd-order real root as (re, 0). + struct section_root { + double re; + double im; + }; + std::vector m_roots; // order-major, orders 1..m_order + size_t m_total_coeffs {0}; + + // Control-side snapshot and the audio-side smoothed table. + std::array m_coefficients {}; + smoothed_table m_smooth; + + // Audio-thread filter state: 2 floats per (channel, section), + // order-major then channel-major — the traversal order of process(). + std::vector m_state; + + public: + /// @param order Ambisonics order in [0, max_order]; channel count is + /// (order+1)^2. Order 0 is a valid degenerate passthrough. + /// @throws std::invalid_argument on out-of-range order. + explicit nfc(int order) + : m_order(validated_order(order, "dsp::nfc")) + , m_channels(channel_count(m_order)) { + compute_roots(); + m_total_coeffs = k_section_coeffs * nfc_total_sections(m_order); + m_state.assign(state_size(), 0.0f); + rebuild_coefficients(); + m_smooth.init(m_coefficients.data(), m_total_coeffs); + } + + int order() const { return m_order; } + size_t channels() const { return m_channels; } + + /// Set the sample rate: recomputes coefficients (snapped, no ramp) and + /// clears the filter state. Control thread; call before audio starts. + void prepare(float sample_rate) { + m_fs = sample_rate; + rebuild_coefficients(); + m_smooth.init(m_coefficients.data(), m_total_coeffs); + reset(); + } + + /// Source distance in meters, clamped to k_min_distance. Control thread. + void set_source_distance(float meters) { + m_source_distance = std::max(meters, k_min_distance); + rebuild_coefficients(); + publish(); + } + float source_distance() const { return m_source_distance; } + + /// Reference (loudspeaker / decoder) distance in meters, clamped to + /// k_min_distance. Control thread. + void set_reference_distance(float meters) { + m_reference_distance = std::max(meters, k_min_distance); + rebuild_coefficients(); + publish(); + } + float reference_distance() const { return m_reference_distance; } + + /// Speed of sound in m/s (clamped to >= 1). Control thread. + void set_speed_of_sound(float meters_per_second) { + m_speed_of_sound = std::max(meters_per_second, 1.0f); + rebuild_coefficients(); + publish(); + } + float speed_of_sound() const { return m_speed_of_sound; } + + /// Closed-form DC gain of the order-m filter: (r_ref / r_src)^m. + float dc_gain(int order_m) const { + return std::pow(m_reference_distance / m_source_distance, static_cast(order_m)); + } + + /// Skip the coefficient ramp: the audio thread jumps straight to the + /// latest targets on its next call. Offline rendering / tests. + void snap_parameters() { m_smooth.snap(); } + + /// Clear the filter state; keep coefficients and allocations. + void reset() { std::fill(m_state.begin(), m_state.end(), 0.0f); } + + /// Process one frame of channels() samples. Output may alias input. + /// Audio thread; wait-free. + void process_frame(const float* in, float* out) noexcept { + const float* co = m_smooth.tick(m_total_coeffs); + out[0] = in[0]; + size_t coeff_base = 0, state_off = 0; + for (int m = 1; m <= m_order; ++m) { + const size_t sections = nfc_sections_for_order(m); + const size_t first = static_cast(m) * static_cast(m); + const size_t last = channel_count(m); + for (size_t ch = first; ch < last; ++ch) { + float v = in[ch]; + for (size_t j = 0; j < sections; ++j) { + v = section_tick(v, co + (coeff_base + j) * k_section_coeffs, + m_state.data() + state_off); + state_off += 2; + } + out[ch] = v; + } + coeff_base += sections; + } + } + + /// Process a block of planar channel buffers. Output may alias input. + /// Audio thread; wait-free. + void process(const float* const* in, float* const* out, size_t frame_count) noexcept { + if (m_smooth.settled()) { + // Fast path: constant coefficients, channel-major blocks. + const float* co = m_smooth.tick(m_total_coeffs); + if (out[0] != in[0]) { + for (size_t i = 0; i < frame_count; ++i) out[0][i] = in[0][i]; + } + size_t coeff_base = 0, state_off = 0; + for (int m = 1; m <= m_order; ++m) { + const size_t sections = nfc_sections_for_order(m); + const size_t first = static_cast(m) * static_cast(m); + const size_t last = channel_count(m); + for (size_t ch = first; ch < last; ++ch) { + for (size_t j = 0; j < sections; ++j) { + section_block(j == 0 ? in[ch] : out[ch], out[ch], frame_count, + co + (coeff_base + j) * k_section_coeffs, + m_state.data() + state_off); + state_off += 2; + } + } + coeff_base += sections; + } + return; + } + for (size_t i = 0; i < frame_count; ++i) { + const float* co = m_smooth.tick(m_total_coeffs); + out[0][i] = in[0][i]; + size_t coeff_base = 0, state_off = 0; + for (int m = 1; m <= m_order; ++m) { + const size_t sections = nfc_sections_for_order(m); + const size_t first = static_cast(m) * static_cast(m); + const size_t last = channel_count(m); + for (size_t ch = first; ch < last; ++ch) { + float v = in[ch][i]; + for (size_t j = 0; j < sections; ++j) { + v = section_tick(v, co + (coeff_base + j) * k_section_coeffs, + m_state.data() + state_off); + state_off += 2; + } + out[ch][i] = v; + } + coeff_base += sections; + } + } + } + + private: + /// One transposed-direct-form-II tick. First-order sections are stored + /// as biquads with b2 = a2 = 0, so one kernel serves both. + static float section_tick(float x, const float* c, float* s) noexcept { + const float y = c[0] * x + s[0]; + s[0] = c[1] * x - c[3] * y + s[1]; + s[1] = c[2] * x - c[4] * y; + return y; + } + + static void section_block(const float* x, float* y, size_t n, const float* c, + float* s) noexcept { + const float b0 = c[0], b1 = c[1], b2 = c[2], a1 = c[3], a2 = c[4]; + float s1 = s[0], s2 = s[1]; + for (size_t i = 0; i < n; ++i) { + const float xi = x[i]; + const float yi = b0 * xi + s1; + s1 = b1 * xi - a1 * yi + s2; + s2 = b2 * xi - a2 * yi; + y[i] = yi; + } + s[0] = s1; + s[1] = s2; + } + + size_t state_size() const { + size_t n = 0; + for (int m = 1; m <= m_order; ++m) { + n += static_cast(2 * m + 1) * nfc_sections_for_order(m) * 2; + } + return n; + } + + // ---- Control-side construction (double precision; never on the + // audio path) ------------------------------------------------- + + struct cplx { + double re, im; + }; + static cplx cadd(cplx a, cplx b) { return {a.re + b.re, a.im + b.im}; } + static cplx csub(cplx a, cplx b) { return {a.re - b.re, a.im - b.im}; } + static cplx cmul(cplx a, cplx b) { + return {a.re * b.re - a.im * b.im, a.re * b.im + a.im * b.re}; + } + static cplx cdiv(cplx a, cplx b) { + const double d = b.re * b.re + b.im * b.im; + return {(a.re * b.re + a.im * b.im) / d, (a.im * b.re - a.re * b.im) / d}; + } + static double cabs(cplx a) { return std::sqrt(a.re * a.re + a.im * a.im); } + + /// Find the m roots of the monic reverse Bessel polynomial theta_m by + /// Durand–Kerner iteration. theta_m's coefficients are exact integers + /// (at most (2m)!/(m! 2^m) ≈ 6.5e8 for m = max_order) and its roots + /// are simple and well separated, so the iteration converges quickly + /// from a generic circular start. Construction time only. + static void bessel_roots(int m, cplx* roots) { + constexpr double k_two_pi = 6.28318530717958647692; + + // coeff[n] multiplies x^(m-n): (m+n)! / ((m-n)! n! 2^n). + double coeff[max_order + 1]; + coeff[0] = 1.0; + for (int n = 0; n < m; ++n) { + coeff[n + 1] = coeff[n] * static_cast((m + n + 1) * (m - n)) + / (2.0 * static_cast(n + 1)); + } + const auto eval = [&](cplx z) { + cplx p {1.0, 0.0}; + for (int n = 1; n <= m; ++n) p = cadd(cmul(p, z), cplx {coeff[n], 0.0}); + return p; + }; + + // Root magnitudes grow roughly linearly with m; start on a circle + // of radius m, rotated off the axes so no guess is real. + for (int i = 0; i < m; ++i) { + const double a = k_two_pi * static_cast(i) / static_cast(m) + 0.5; + roots[i] = {static_cast(m) * std::cos(a), + static_cast(m) * std::sin(a)}; + } + for (int iter = 0; iter < 256; ++iter) { + double worst = 0.0; + for (int i = 0; i < m; ++i) { + cplx den {1.0, 0.0}; + for (int j = 0; j < m; ++j) { + if (j != i) den = cmul(den, csub(roots[i], roots[j])); + } + const cplx delta = cdiv(eval(roots[i]), den); + roots[i] = csub(roots[i], delta); + worst = std::max(worst, cabs(delta) / (1.0 + cabs(roots[i]))); + } + if (worst < 1e-14) break; + } + } + + /// Compute and store one section root set per order 1..m_order. + void compute_roots() { + m_roots.clear(); + m_roots.reserve(nfc_total_sections(m_order)); + for (int m = 1; m <= m_order; ++m) { + cplx roots[max_order]; + bessel_roots(m, roots); + + // Conjugate pairs (im > 0), most-damped first for determinism. + cplx pairs[max_order / 2]; + int pair_count = 0; + int real_idx = 0; + for (int i = 1; i < m; ++i) { + if (std::abs(roots[i].im) < std::abs(roots[real_idx].im)) real_idx = i; + } + for (int i = 0; i < m; ++i) { + if (m % 2 == 1 && i == real_idx) continue; + if (roots[i].im > 0.0) pairs[pair_count++] = roots[i]; + } + // Insertion sort: tiny fixed-size array (std::sort trips GCC's + // -Warray-bounds here — its insertion threshold exceeds 5). + for (int i = 1; i < pair_count; ++i) { + const cplx key = pairs[i]; + int j = i - 1; + for (; j >= 0 && pairs[j].re > key.re; --j) pairs[j + 1] = pairs[j]; + pairs[j + 1] = key; + } + for (int i = 0; i < pair_count; ++i) { + m_roots.push_back({pairs[i].re, pairs[i].im}); + } + if (m % 2 == 1) m_roots.push_back({roots[real_idx].re, 0.0}); + } + } + + /// Recompute the full coefficient table for the current distances, + /// speed of sound, and sample rate. Bilinear transform per section: + /// zeros are the theta_m roots scaled by 1/tau_src, poles the same + /// roots scaled by 1/tau_ref (tau = r/c). Double precision, cast to + /// float on store. Control thread; no allocation. + void rebuild_coefficients() { + const double c = static_cast(m_speed_of_sound); + const double t_s = static_cast(m_source_distance) / c; + const double t_r = static_cast(m_reference_distance) / c; + const double k = 2.0 * static_cast(m_fs); + const double k2 = k * k; + for (size_t idx = 0; idx < m_roots.size(); ++idx) { + const section_root r = m_roots[idx]; + float* cf = m_coefficients.data() + idx * k_section_coeffs; + if (r.im == 0.0) { + // First-order: analog (s - z)/(s - p), z = re/t_s, p = re/t_r. + const double z = r.re / t_s; + const double p = r.re / t_r; + const double d = k - p; // > 0: p < 0, k > 0 + cf[0] = static_cast((k - z) / d); + cf[1] = static_cast(-(k + z) / d); + cf[2] = 0.0f; + cf[3] = static_cast(-(k + p) / d); + cf[4] = 0.0f; + } + else { + // Conjugate pair: s^2 - 2 Re(x)/tau · s + |x|^2/tau^2. + const double m2 = r.re * r.re + r.im * r.im; + const double zr = r.re / t_s, zm = m2 / (t_s * t_s); + const double pr = r.re / t_r, pm = m2 / (t_r * t_r); + const double d0 = k2 - 2.0 * pr * k + pm; // > 0: pr < 0 + cf[0] = static_cast((k2 - 2.0 * zr * k + zm) / d0); + cf[1] = static_cast(2.0 * (zm - k2) / d0); + cf[2] = static_cast((k2 + 2.0 * zr * k + zm) / d0); + cf[3] = static_cast(2.0 * (pm - k2) / d0); + cf[4] = static_cast((k2 + 2.0 * pr * k + pm) / d0); + } + } + } + + void publish() { m_smooth.set(m_coefficients.data(), m_total_coeffs); } + }; + +} // namespace ambitap::dsp + +#endif // AMBITAP_DSP_NFC_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index ea13bda..be6acd7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable(ambitap_tests test_dsp_encoder.cpp test_dsp_transforms.cpp test_dsp_stateful.cpp + test_nfc.cpp test_dsp_matrix.cpp test_dsp_binaural.cpp test_analysis_soundfield.cpp diff --git a/tests/test_nfc.cpp b/tests/test_nfc.cpp new file mode 100644 index 0000000..5d8bb25 --- /dev/null +++ b/tests/test_nfc.cpp @@ -0,0 +1,162 @@ +/// AmbiTap: target-independent ambisonics library +/// Tests for dsp::nfc — near-field compensation (NFC-HOA, Daniel). +/// Timothy Place +/// Copyright 2026 Timothy Place. + +#include "ambitap/dsp/nfc.h" + +#include + +#include +#include + +using namespace ambitap; + +namespace { + + /// Planar buffers + pointer tables for process(). + struct planar { + std::vector> in_bufs, out_bufs; + std::vector in; + std::vector out; + planar(size_t channels, size_t frames) + : in_bufs(channels, std::vector(frames, 0.f)) + , out_bufs(channels, std::vector(frames, 0.f)) { + for (size_t ch = 0; ch < channels; ++ch) { + in.push_back(in_bufs[ch].data()); + out.push_back(out_bufs[ch].data()); + } + } + }; + +} // namespace + +TEST(DspNfc, IdentityAtEqualDistances) { + // At r_src == r_ref every section's zero coincides with its pole and the + // digital coefficients reduce to an exact passthrough (b0 = 1, b1 = a1, + // b2 = a2), so the output must match the input to the last bit. + dsp::nfc filt(3); + filt.prepare(48000.f); + filt.set_source_distance(2.5f); + filt.set_reference_distance(2.5f); + filt.snap_parameters(); + + constexpr size_t frames = 512; + planar io(filt.channels(), frames); + for (size_t ch = 0; ch < filt.channels(); ++ch) { + for (size_t i = 0; i < frames; ++i) { + io.in_bufs[ch][i] = + std::sin(0.01f * static_cast(i + ch)) + 0.25f * static_cast(ch); + } + } + filt.process(io.in.data(), io.out.data(), frames); + + for (size_t ch = 0; ch < filt.channels(); ++ch) { + for (size_t i = 0; i < frames; ++i) { + ASSERT_FLOAT_EQ(io.out_bufs[ch][i], io.in_bufs[ch][i]) << "ch=" << ch << " i=" << i; + } + } +} + +TEST(DspNfc, DcGainMatchesClosedForm) { + // The order-m filter's DC gain has the closed form (r_ref / r_src)^m: + // a low shelf cut for sources beyond the reference distance, a boost + // inside it. Drive with DC and compare the steady state per channel. + struct scenario { + float rs, rref; + int order; + }; + const scenario cases[] = { + {2.0f, 1.0f, 5}, // cut: source beyond the reference + {0.5f, 1.0f, 5}, // boost: source inside the reference + {1.5f, 1.0f, max_order} // exercises the Bessel roots at every order + }; + + for (const auto& sc : cases) { + dsp::nfc filt(sc.order); + filt.prepare(48000.f); + filt.set_source_distance(sc.rs); + filt.set_reference_distance(sc.rref); + filt.snap_parameters(); + + const size_t C = filt.channels(); + std::vector in(C, 1.f), out(C, 0.f); + for (int i = 0; i < 48000; ++i) filt.process_frame(in.data(), out.data()); + + // Tolerance: the audio path is float32 (embedded contract), and at + // these corner frequencies the biquad denominator sum 1 + a1 + a2 + // cancels to ~1e-4, so coefficient quantization bounds the realized + // DC gain to ~0.3% (0.03 dB) of the closed form. + for (size_t ch = 0; ch < C; ++ch) { + const int m = acn_order(ch); + const float expected = std::pow(sc.rref / sc.rs, static_cast(m)); + EXPECT_EQ(expected, filt.dc_gain(m)); + EXPECT_NEAR(out[ch], expected, 1e-2f * expected) + << "rs=" << sc.rs << " rref=" << sc.rref << " ch=" << ch; + } + } +} + +TEST(DspNfc, SourceDistanceClampsToDocumentedMinimum) { + dsp::nfc filt(3); + filt.set_source_distance(0.001f); + EXPECT_EQ(filt.source_distance(), dsp::nfc::k_min_distance); + filt.set_reference_distance(0.f); + EXPECT_EQ(filt.reference_distance(), dsp::nfc::k_min_distance); +} + +TEST(DspNfc, ImpulseResponseDecaysAtAggressiveDistances) { + // The poles are the theta_m (Bessel) roots scaled by c / r_ref — all in + // the left half-plane, mapped inside the unit circle by the bilinear + // transform. Even at the minimum source distance and a far reference the + // impulse response must stay finite and die away. + for (int order = 1; order <= 5; ++order) { + dsp::nfc filt(order); + filt.prepare(48000.f); + filt.set_source_distance(0.01f); // clamps to k_min_distance + filt.set_reference_distance(3.f); + filt.snap_parameters(); + + const size_t C = filt.channels(); + std::vector in(C, 0.f), out(C, 0.f); + float peak = 0.f, tail = 0.f; + constexpr int total = 48000, tail_start = 44000; + for (int i = 0; i < total; ++i) { + const float v = (i == 0) ? 1.f : 0.f; + for (auto& x : in) x = v; + filt.process_frame(in.data(), out.data()); + for (size_t ch = 0; ch < C; ++ch) { + ASSERT_TRUE(std::isfinite(out[ch])) << "order=" << order << " i=" << i; + const float a = std::abs(out[ch]); + peak = std::max(peak, a); + if (i >= tail_start) tail = std::max(tail, a); + } + } + EXPECT_GT(peak, 0.f) << "order=" << order; + EXPECT_LT(tail, 1e-6f * peak) << "order=" << order; + } +} + +TEST(DspNfc, ParameterChangesRampWithoutSnap) { + // Without snap_parameters() a distance change ramps in over + // k_smoothing_samples; the first block after the change must neither jump + // instantly to the new response nor blow up. + dsp::nfc filt(2); + filt.prepare(48000.f); + filt.snap_parameters(); + + const size_t C = filt.channels(); + std::vector in(C, 1.f), out(C, 0.f); + for (int i = 0; i < 4800; ++i) filt.process_frame(in.data(), out.data()); + const float before = out[C - 1]; + + filt.set_source_distance(0.25f); // boost — ramped, not snapped + filt.process_frame(in.data(), out.data()); + EXPECT_NEAR(out[C - 1], before, 0.25f * std::abs(before) + 1e-3f); + + for (int i = 0; i < 48000; ++i) { + filt.process_frame(in.data(), out.data()); + for (size_t ch = 0; ch < C; ++ch) ASSERT_TRUE(std::isfinite(out[ch])); + } + EXPECT_NEAR(out[C - 1], filt.dc_gain(2), 1e-2f * filt.dc_gain(2)); +} diff --git a/tests/test_rt_safety.cpp b/tests/test_rt_safety.cpp index 95ea9ea..fb6158a 100644 --- a/tests/test_rt_safety.cpp +++ b/tests/test_rt_safety.cpp @@ -15,6 +15,7 @@ #include "ambitap/dsp/encoder.h" #include "ambitap/dsp/format_converter.h" #include "ambitap/dsp/mirror.h" +#include "ambitap/dsp/nfc.h" #include "ambitap/dsp/rotator.h" #include "ambitap/dsp/spatial_compressor.h" #include "ambitap/dsp/virtual_mic.h" @@ -246,6 +247,10 @@ TEST(RtSafety, StatefulProcessorsAreAllocationFree) { dsp::spatial_compressor comp(1); comp.prepare(48000.f); + dsp::nfc nf(3); + nf.prepare(48000.f); + nf.set_source_distance(0.5f); // mid-ramp exercises the smoothing path too + analysis::energy_vector ev; ev.prepare(48000.f); @@ -253,12 +258,15 @@ TEST(RtSafety, StatefulProcessorsAreAllocationFree) { grid.prepare(48000.f); planar io(4, frames); + planar io16(16, frames); std::vector ev_x(frames), ev_y(frames), ev_z(frames); float* ev_out[3] = {ev_x.data(), ev_y.data(), ev_z.data()}; rt_guard guard; dop.process(io.in.data(), io.out.data(), frames); comp.process(io.in.data(), io.out.data(), frames); + nf.process(io16.in.data(), io16.out.data(), frames); + nf.process_frame(io16.in[0], io16.out[0]); ev.process(io.in.data(), ev_out, frames); grid.process(io.in.data(), frames); EXPECT_EQ(guard.allocations(), 0); From 4090c92dd3c5434b51caa0b3a0af30f58421df77 Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 3 Jul 2026 19:37:32 +0000 Subject: [PATCH 3/3] nfc: run the section recurrence in double state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The AppleClang/arm64 CI leg failed DspNfc.IdentityAtEqualDistances: FMA contraction (on by default there) breaks the exact b==a cancellation in the transposed-DF2 recurrence, and the NFC sections' near-unit-circle poles (corners ~20 Hz at 48 kHz; 1+a1+a2 ~ 1e-4) amplify the last-ulp residue — measured growing to ~-65 dB over 4k samples with float32 state. Reproduced on Linux with clang -ffp-contract=fast -mfma. Keep float32 I/O and float coefficient ramping, but run the per-section state and arithmetic in double: the residue drops to ~-240 dB and the low-corner conditioning hazard goes away. The identity test now asserts a 1e-6 absolute bound (regression guard) instead of bit equality, with the rationale documented in both files. 121/121 green with and without -ffp-contract=fast -mfma; clang-format clean; freestanding profile compiles clean. Co-Authored-By: Claude Fable 5 Claude-Session: https://claude.ai/code/session_012VeadvCRUHJdneFNwRbFAM --- include/ambitap/dsp/nfc.h | 49 ++++++++++++++++++++++++--------------- tests/test_nfc.cpp | 12 +++++++--- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/include/ambitap/dsp/nfc.h b/include/ambitap/dsp/nfc.h index d3918b4..8215a6d 100644 --- a/include/ambitap/dsp/nfc.h +++ b/include/ambitap/dsp/nfc.h @@ -68,8 +68,13 @@ namespace ambitap::dsp { /// r_src → 0, so both distances are clamped to k_min_distance (0.1 m). /// Even so, small source distances at high orders are extreme by nature — /// e.g. r_src = 0.1 m, r_ref = 1 m at order 5 is a +100 dB bass boost. - /// The audio path is float32 (the embedded contract): the shelf corners - /// sit far below fs, so coefficient quantization bounds the realized + /// The audio I/O is float32 (the embedded contract), but the section + /// state and recurrence arithmetic are double: the shelf corners sit far + /// below fs, which puts the biquad poles within ~1e-4 of the unit circle + /// — float32 state lets per-sample rounding (and FMA contraction, which + /// AppleClang applies by default) accumulate through that near-resonance + /// to audible levels (~-65 dB observed). Double state keeps the residue + /// near -240 dB; float coefficient quantization still bounds the realized /// low-frequency gain to within about 0.03 dB of the closed form at /// 48 kHz and ordinary distances. /// @@ -118,9 +123,10 @@ namespace ambitap::dsp { std::array m_coefficients {}; smoothed_table m_smooth; - // Audio-thread filter state: 2 floats per (channel, section), + // Audio-thread filter state: 2 doubles per (channel, section), // order-major then channel-major — the traversal order of process(). - std::vector m_state; + // Double, not float: see the numerical-care note in the class comment. + std::vector m_state; public: /// @param order Ambisonics order in [0, max_order]; channel count is @@ -131,7 +137,7 @@ namespace ambitap::dsp { , m_channels(channel_count(m_order)) { compute_roots(); m_total_coeffs = k_section_coeffs * nfc_total_sections(m_order); - m_state.assign(state_size(), 0.0f); + m_state.assign(state_size(), 0.0); rebuild_coefficients(); m_smooth.init(m_coefficients.data(), m_total_coeffs); } @@ -183,7 +189,7 @@ namespace ambitap::dsp { void snap_parameters() { m_smooth.snap(); } /// Clear the filter state; keep coefficients and allocations. - void reset() { std::fill(m_state.begin(), m_state.end(), 0.0f); } + void reset() { std::fill(m_state.begin(), m_state.end(), 0.0); } /// Process one frame of channels() samples. Output may alias input. /// Audio thread; wait-free. @@ -259,23 +265,28 @@ namespace ambitap::dsp { private: /// One transposed-direct-form-II tick. First-order sections are stored /// as biquads with b2 = a2 = 0, so one kernel serves both. - static float section_tick(float x, const float* c, float* s) noexcept { - const float y = c[0] * x + s[0]; - s[0] = c[1] * x - c[3] * y + s[1]; - s[1] = c[2] * x - c[4] * y; - return y; + static float section_tick(float x, const float* c, double* s) noexcept { + const double xd = static_cast(x); + const double y = static_cast(c[0]) * xd + s[0]; + s[0] = static_cast(c[1]) * xd - static_cast(c[3]) * y + s[1]; + s[1] = static_cast(c[2]) * xd - static_cast(c[4]) * y; + return static_cast(y); } static void section_block(const float* x, float* y, size_t n, const float* c, - float* s) noexcept { - const float b0 = c[0], b1 = c[1], b2 = c[2], a1 = c[3], a2 = c[4]; - float s1 = s[0], s2 = s[1]; + double* s) noexcept { + const double b0 = static_cast(c[0]); + const double b1 = static_cast(c[1]); + const double b2 = static_cast(c[2]); + const double a1 = static_cast(c[3]); + const double a2 = static_cast(c[4]); + double s1 = s[0], s2 = s[1]; for (size_t i = 0; i < n; ++i) { - const float xi = x[i]; - const float yi = b0 * xi + s1; - s1 = b1 * xi - a1 * yi + s2; - s2 = b2 * xi - a2 * yi; - y[i] = yi; + const double xi = static_cast(x[i]); + const double yi = b0 * xi + s1; + s1 = b1 * xi - a1 * yi + s2; + s2 = b2 * xi - a2 * yi; + y[i] = static_cast(yi); } s[0] = s1; s[1] = s2; diff --git a/tests/test_nfc.cpp b/tests/test_nfc.cpp index 5d8bb25..6834398 100644 --- a/tests/test_nfc.cpp +++ b/tests/test_nfc.cpp @@ -33,8 +33,14 @@ namespace { TEST(DspNfc, IdentityAtEqualDistances) { // At r_src == r_ref every section's zero coincides with its pole and the - // digital coefficients reduce to an exact passthrough (b0 = 1, b1 = a1, - // b2 = a2), so the output must match the input to the last bit. + // digital coefficients reduce to a passthrough (b0 = 1, b1 = a1, b2 = a2). + // The cancellation is not bit-exact under floating-point contraction + // (AppleClang/arm64 fuses b1*x - a1*y into an FMA by default), and the + // near-unit-circle poles of these low-corner sections amplify any such + // residue — with float32 state that reached ~-65 dB, which is why the + // recurrence runs in double (see nfc.h). Assert a tight absolute bound: + // double state keeps the residue near -240 dB, so 1e-6 has huge margin + // while still failing loudly if the state precision ever regresses. dsp::nfc filt(3); filt.prepare(48000.f); filt.set_source_distance(2.5f); @@ -53,7 +59,7 @@ TEST(DspNfc, IdentityAtEqualDistances) { for (size_t ch = 0; ch < filt.channels(); ++ch) { for (size_t i = 0; i < frames; ++i) { - ASSERT_FLOAT_EQ(io.out_bufs[ch][i], io.in_bufs[ch][i]) << "ch=" << ch << " i=" << i; + ASSERT_NEAR(io.out_bufs[ch][i], io.in_bufs[ch][i], 1e-6f) << "ch=" << ch << " i=" << i; } } }