diff --git a/THIRD_PARTY_NOTICES b/THIRD_PARTY_NOTICES new file mode 100644 index 0000000000..957e20ffc3 --- /dev/null +++ b/THIRD_PARTY_NOTICES @@ -0,0 +1,61 @@ +This file reproduces copyright notices and license terms for third-party +software components that are compiled into Element Call's distributed bundle +and carry obligations to reproduce their notices in binary distributions. + +------------------------------------------------------------------------------- + +rnnoise — Recurrent neural network for audio noise reduction +Compiled to WebAssembly and bundled via @jitsi/rnnoise-wasm +https://github.com/xiph/rnnoise + +Copyright (c) 2017, Mozilla +Copyright (c) 2007-2017, Jean-Marc Valin +Copyright (c) 2005-2017, Xiph.Org Foundation +Copyright (c) 2003-2004, Mark Borgerding + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +- Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +- Neither the name of the Xiph.Org Foundation nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------- + +@jitsi/rnnoise-wasm — WebAssembly build and JS wrapper for rnnoise +https://github.com/jitsi/rnnoise-wasm + +Copyright (c) 2013 ESTOS GmbH +Copyright (c) 2013 BlueJimp SARL + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/locales/en/app.json b/locales/en/app.json index 0b0ac7b4dc..ce7e0c2f71 100644 --- a/locales/en/app.json +++ b/locales/en/app.json @@ -196,7 +196,14 @@ "settings": { "audio_tab": { "effect_volume_description": "Adjust the volume at which reactions and hand raised effects play.", - "effect_volume_label": "Sound effect volume" + "effect_volume_label": "Sound effect volume", + "rnnoise_header": "Noise suppression", + "rnnoise_label": "Enable enhanced noise suppression (RNNoise)", + "rnnoise_not_supported": "(Enhanced noise suppression is not supported by this browser.)", + "rnnoise_preset_balanced": "Balanced", + "rnnoise_preset_conservative": "Conservative", + "rnnoise_preset_description": "Pick a suppression profile. Stronger modes remove more keyboard noise but can sound more processed.", + "rnnoise_preset_strong": "Strong" }, "background_blur_header": "Background", "background_blur_label": "Blur the background of the video", diff --git a/package.json b/package.json index 705b0f103d..b7c8733848 100644 --- a/package.json +++ b/package.json @@ -140,5 +140,8 @@ "@livekit/components-core/rxjs": "^7.8.1", "@livekit/track-processors/@mediapipe/tasks-vision": "^0.10.18" }, - "packageManager": "yarn@4.7.0" + "packageManager": "yarn@4.7.0", + "dependencies": { + "@jitsi/rnnoise-wasm": "0.2.1" + } } diff --git a/playwright.config.ts b/playwright.config.ts index 4fb86b95b8..794d82bd75 100644 --- a/playwright.config.ts +++ b/playwright.config.ts @@ -11,6 +11,32 @@ import { defineConfig, devices } from "@playwright/test"; const baseURL = process.env.USE_DOCKER ? "http://localhost:8080" : "https://localhost:3000"; +const fakeAudioCaptureFile = process.env.PLAYWRIGHT_FAKE_AUDIO_CAPTURE_FILE; +const fakeVideoCaptureFile = process.env.PLAYWRIGHT_FAKE_VIDEO_CAPTURE_FILE; +const disableChromiumSandbox = + process.env.PLAYWRIGHT_DISABLE_CHROMIUM_SANDBOX === "1"; + +function buildFakeMediaArgs(): string[] { + const args = [ + "--use-fake-ui-for-media-stream", + "--use-fake-device-for-media-stream", + "--mute-audio", + ]; + + if (fakeAudioCaptureFile) { + args.push(`--use-file-for-fake-audio-capture=${fakeAudioCaptureFile}`); + } + if (fakeVideoCaptureFile) { + args.push(`--use-file-for-fake-video-capture=${fakeVideoCaptureFile}`); + } + + return args; +} + +const fakeMediaArgs = buildFakeMediaArgs(); +const chromiumLaunchArgs = disableChromiumSandbox + ? [...fakeMediaArgs, "--no-sandbox", "--disable-setuid-sandbox"] + : fakeMediaArgs; /** * See https://playwright.dev/docs/test-configuration. @@ -49,12 +75,9 @@ export default defineConfig({ "camera", ], ignoreHTTPSErrors: true, + chromiumSandbox: !disableChromiumSandbox, launchOptions: { - args: [ - "--use-fake-ui-for-media-stream", - "--use-fake-device-for-media-stream", - "--mute-audio", - ], + args: chromiumLaunchArgs, }, }, }, @@ -84,12 +107,9 @@ export default defineConfig({ "microphone", "camera", ], + chromiumSandbox: !disableChromiumSandbox, launchOptions: { - args: [ - "--use-fake-ui-for-media-stream", - "--use-fake-device-for-media-stream", - "--mute-audio", - ], + args: chromiumLaunchArgs, }, }, }, diff --git a/playwright/spa-call-sticky.spec.ts b/playwright/spa-call-sticky.spec.ts index 246b4a73a2..d328770d9c 100644 --- a/playwright/spa-call-sticky.spec.ts +++ b/playwright/spa-call-sticky.spec.ts @@ -7,18 +7,24 @@ Please see LICENSE in the repository root for full details. import { expect, + type Locator, type Page, test, type Request, type Browser, + type ConsoleMessage, } from "@playwright/test"; import { SpaHelpers } from "./spa-helpers"; +const RNNOISE_LABEL = "Enable enhanced noise suppression (RNNoise)"; +const RNNOISE_TOGGLE_SELECTOR = "#activateRNNoiseSuppression"; + async function setupTwoUserSpaCall( browser: Browser, page: Page, browserName: string, + callName = `HelloCall-${Date.now()}-${Math.floor(Math.random() * 10000)}`, ): Promise<{ guestPage: Page }> { test.skip( browserName === "firefox", @@ -39,7 +45,7 @@ async function setupTwoUserSpaCall( }, ); - await SpaHelpers.createCall(page, "Androl", "HelloCall", true, "2_0"); + await SpaHelpers.createCall(page, "Androl", callName, true, "2_0"); const inviteLink = await SpaHelpers.getCallInviteLink(page); @@ -70,8 +76,8 @@ async function setupTwoUserSpaCall( "2_0", ); // Assert both sides have sent sticky membership events - expect(androlHasSentStickyEvent).toEqual(true); - expect(pevaraHasSentStickyEvent).toEqual(true); + await expect.poll(() => androlHasSentStickyEvent).toBe(true); + await expect.poll(() => pevaraHasSentStickyEvent).toBe(true); return { guestPage }; } @@ -114,6 +120,142 @@ test("One to One rejoin after improper leave does not crash EC", async ({ await expect(guestPage.getByTestId("videoTile")).toHaveCount(2); }); +test.describe("RNNoise scenarios", () => { + test.describe.configure({ mode: "serial" }); + + test.skip( + ({ browserName }) => browserName !== "chromium", + "RNNoise scenarios are validated on Chromium fake-media infrastructure.", + ); + + test("One to One rejoin after improper leave stays stable with RNNoise enabled", async ({ + browser, + page, + browserName, + }) => { + const { guestPage } = await setupTwoUserSpaCall(browser, page, browserName); + + await SpaHelpers.expectVideoTilesCount(page, 2); + await SpaHelpers.expectVideoTilesCount(guestPage, 2); + + const rnnoiseSupported = await enableRNNoiseInSettings(guestPage); + test.skip( + !rnnoiseSupported, + "RNNoise is not supported in this browser environment", + ); + + await expect + .poll(async () => + guestPage.evaluate(() => + localStorage.getItem("matrix-setting-rnnoise-noise-suppression"), + ), + ) + .toBe("true"); + + await guestPage.reload(); + await expect(guestPage.getByTestId("lobby_joinCall")).toBeVisible(); + await guestPage.getByTestId("lobby_joinCall").click(); + + // Rejoin after abrupt disconnect should remain stable with RNNoise enabled. + await expect(page.getByTestId("videoTile")).toHaveCount(3); + await expect(guestPage.getByTestId("videoTile")).toHaveCount(2); + await expect( + guestPage.getByRole("button", { name: "Mute microphone" }), + ).toBeVisible(); + + await expectRNNoiseEnabledInSettings(guestPage); + }); + + test("One to One call stays stable when switching devices with RNNoise enabled", async ({ + browser, + page, + browserName, + }) => { + const { guestPage } = await setupTwoUserSpaCall(browser, page, browserName); + + await SpaHelpers.expectVideoTilesCount(page, 2); + await SpaHelpers.expectVideoTilesCount(guestPage, 2); + + const rnnoiseSupported = await enableRNNoiseInSettings(guestPage); + test.skip( + !rnnoiseSupported, + "RNNoise is not supported in this browser environment", + ); + + const rnnoiseErrors: string[] = []; + const consoleHandler = (message: ConsoleMessage): void => { + if ( + message.type() === "error" && + /rnnoise|audio\s*worklet/i.test(message.text()) + ) { + rnnoiseErrors.push(message.text()); + } + }; + guestPage.on("console", consoleHandler); + + await openAudioSettings(guestPage); + const microphoneDeviceRadios = await getDeviceSelectionRadios( + guestPage, + "Microphone", + ); + + // Some Chromium fake-device environments expose only one audio-input device, + // so device switching cannot be forced there. Fall back to output switching. + if (microphoneDeviceRadios.count < 2) { + const speakerDeviceRadios = await getDeviceSelectionRadios( + guestPage, + "Speaker", + ); + expect(speakerDeviceRadios.count).toBeGreaterThan(0); + + if (speakerDeviceRadios.count > 1) { + const selectedSpeakerBefore = await guestPage.evaluate(() => + localStorage.getItem("matrix-setting-audio-output"), + ); + const targetSpeakerIndex = + speakerDeviceRadios.firstUncheckedIndex >= 0 + ? speakerDeviceRadios.firstUncheckedIndex + : 0; + await speakerDeviceRadios.radios.nth(targetSpeakerIndex).click(); + await expect + .poll(async () => + guestPage.evaluate(() => + localStorage.getItem("matrix-setting-audio-output"), + ), + ) + .not.toBe(selectedSpeakerBefore); + } + } else { + const selectedMicrophoneBefore = await guestPage.evaluate(() => + localStorage.getItem("matrix-setting-audio-input"), + ); + const targetMicrophoneIndex = + microphoneDeviceRadios.firstUncheckedIndex >= 0 + ? microphoneDeviceRadios.firstUncheckedIndex + : 1; + await microphoneDeviceRadios.radios.nth(targetMicrophoneIndex).click(); + await expect + .poll(async () => + guestPage.evaluate(() => + localStorage.getItem("matrix-setting-audio-input"), + ), + ) + .not.toBe(selectedMicrophoneBefore); + } + + await guestPage.getByTestId("modal_close").click(); + await SpaHelpers.expectVideoTilesCount(page, 2); + await SpaHelpers.expectVideoTilesCount(guestPage, 2); + await expect( + guestPage.getByRole("button", { name: "Mute microphone" }), + ).toBeVisible(); + await expectRNNoiseEnabledInSettings(guestPage); + expect(rnnoiseErrors).toEqual([]); + + guestPage.off("console", consoleHandler); + }); +}); + function isStickySend(url: string): boolean { return !!new URL(url).searchParams.get( "org.matrix.msc4354.sticky_duration_ms", @@ -133,3 +275,63 @@ async function interceptEventSend( }, ); } + +async function openAudioSettings(page: Page): Promise { + await page.getByRole("button", { name: "Settings" }).click(); + await page.getByRole("tab", { name: "Audio" }).click(); +} + +async function getDeviceSelectionRadios( + page: Page, + sectionHeading: string, +): Promise<{ + radios: Locator; + count: number; + firstUncheckedIndex: number; +}> { + const section = page + .locator("div") + .filter({ + has: page.getByRole("heading", { name: sectionHeading, exact: true }), + }) + .first(); + const radios = section.getByRole("radio"); + const count = await radios.count(); + const firstUncheckedIndex = await radios.evaluateAll((nodes) => + nodes.findIndex((node) => { + if (node instanceof HTMLInputElement) { + return !node.checked; + } + return node.getAttribute("aria-checked") !== "true"; + }), + ); + + return { radios, count, firstUncheckedIndex }; +} + +async function enableRNNoiseInSettings(page: Page): Promise { + await openAudioSettings(page); + + const rnnoiseLabel = page.locator("label", { hasText: RNNOISE_LABEL }); + await expect(rnnoiseLabel).toBeVisible(); + const rnnoiseToggle = page.locator(RNNOISE_TOGGLE_SELECTOR); + const supported = await rnnoiseToggle.isEnabled(); + if (supported && !(await rnnoiseToggle.isChecked())) { + await rnnoiseLabel.click(); + await expect(rnnoiseToggle).toBeChecked(); + } + + await page.getByTestId("modal_close").click(); + return supported; +} + +async function expectRNNoiseEnabledInSettings(page: Page): Promise { + await openAudioSettings(page); + + const rnnoiseLabel = page.locator("label", { hasText: RNNOISE_LABEL }); + await expect(rnnoiseLabel).toBeVisible(); + const rnnoiseToggle = page.locator(RNNOISE_TOGGLE_SELECTOR); + await expect(rnnoiseToggle).toBeChecked(); + + await page.getByTestId("modal_close").click(); +} diff --git a/playwright/spa-helpers.ts b/playwright/spa-helpers.ts index 648f6e6e47..7e91e25247 100644 --- a/playwright/spa-helpers.ts +++ b/playwright/spa-helpers.ts @@ -95,6 +95,7 @@ async function setRtcModeFromSettings( page: Page, mode: RtcMode, ): Promise { + await expect(page.getByRole("button", { name: "Settings" })).toBeVisible(); await page.getByRole("button", { name: "Settings" }).click(); await page.getByRole("tab", { name: "Preferences" }).click(); await page.getByText("Developer mode", { exact: true }).check(); // Idempotent: won't uncheck if already checked diff --git a/src/audio/RNNoiseProcessor.test.ts b/src/audio/RNNoiseProcessor.test.ts new file mode 100644 index 0000000000..a026fac7cf --- /dev/null +++ b/src/audio/RNNoiseProcessor.test.ts @@ -0,0 +1,595 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import { Track } from "livekit-client"; +import { logger } from "matrix-js-sdk/lib/logger"; + +import { + createRNNoiseWorkletCodeForTesting, + RNNoiseProcessor, + supportsRNNoiseProcessor, +} from "./RNNoiseProcessor"; + +const RNNOISE_WORKLET_MODULE_URL = "/assets/RNNoiseWorkletModule.js"; + +vi.mock("./RNNoiseWorkletModule.ts?url", () => ({ + default: "/assets/RNNoiseWorkletModule.js", +})); + +type TestContext = { + addModule: ReturnType; + createSourceNode: ReturnType; + createDestinationNode: ReturnType; + sourceNode: MediaStreamAudioSourceNode; + destinationNode: MediaStreamAudioDestinationNode; + processedTrack: MediaStreamTrack & { stop: ReturnType }; + workletNode: AudioWorkletNode; + audioContext: AudioContext; + track: MediaStreamTrack; +}; + +function createTestContext(sampleRate = 48000): TestContext { + const processedTrack = { + id: "processed-track", + stop: vi.fn(), + } as unknown as MediaStreamTrack & { stop: ReturnType }; + const sourceNode = { + connect: vi.fn(), + disconnect: vi.fn(), + } as unknown as MediaStreamAudioSourceNode; + const destinationNode = { + stream: { + getAudioTracks: () => [processedTrack], + }, + disconnect: vi.fn(), + } as unknown as MediaStreamAudioDestinationNode; + const workletNode = { + connect: vi.fn(), + disconnect: vi.fn(), + port: { + postMessage: vi.fn(), + }, + } as unknown as AudioWorkletNode; + const addModule = vi.fn().mockResolvedValue(undefined); + const createSourceNode = vi.fn().mockReturnValue(sourceNode); + const createDestinationNode = vi.fn().mockReturnValue(destinationNode); + const audioContext = { + sampleRate, + audioWorklet: { addModule }, + createMediaStreamSource: createSourceNode, + createMediaStreamDestination: createDestinationNode, + } as unknown as AudioContext; + const track = { + id: "input-track", + kind: Track.Kind.Audio, + } as MediaStreamTrack; + + return { + addModule, + createSourceNode, + createDestinationNode, + sourceNode, + destinationNode, + processedTrack, + workletNode, + audioContext, + track, + }; +} + +function getGeneratedWorkletCode(): string { + return createRNNoiseWorkletCodeForTesting( + "function createRNNWasmModuleSync(){}; export default createRNNWasmModuleSync;", + ); +} + +type WorkletPresetConfig = { + maxAttenuationDb: number; + openThreshold: number; + closeThreshold: number; + holdFrames: number; + attenuateMs: number; + releaseMs: number; +}; + +function getPresetConfig( + workletCode: string, + preset: "conservative" | "balanced" | "strong", +): WorkletPresetConfig { + const presetMatch = workletCode.match( + new RegExp(`${preset}:\\s*\\{([\\s\\S]*?)\\n\\s*\\},`), + ); + if (!presetMatch) { + throw new Error(`Could not find ${preset} preset in worklet code.`); + } + const presetBlock = presetMatch[1]; + const readNumber = (key: keyof WorkletPresetConfig): number => { + const keyMatch = presetBlock.match(new RegExp(`${key}:\\s*([0-9.]+)`)); + if (!keyMatch) { + throw new Error(`Could not find ${key} in ${preset} preset.`); + } + return Number(keyMatch[1]); + }; + + return { + maxAttenuationDb: readNumber("maxAttenuationDb"), + openThreshold: readNumber("openThreshold"), + closeThreshold: readNumber("closeThreshold"), + holdFrames: readNumber("holdFrames"), + attenuateMs: readNumber("attenuateMs"), + releaseMs: readNumber("releaseMs"), + }; +} + +function expectedAttenuationDb( + config: WorkletPresetConfig, + vadProbability: number, +): number { + if (vadProbability >= config.openThreshold) { + return 0; + } + + const thresholdRange = config.openThreshold - config.closeThreshold; + const attenuationProgress = + thresholdRange > 0 + ? Math.max( + 0, + Math.min( + 1, + (config.openThreshold - vadProbability) / thresholdRange, + ), + ) + : 1; + return attenuationProgress * config.maxAttenuationDb; +} + +function instantiateWorkletProcessor(workletCode: string): { + process: ( + inputs: Float32Array[][], + outputs: Float32Array[][], + params?: Record, + ) => boolean; +} { + let ProcessorCtor: + | (new () => { + process: ( + inputs: Float32Array[][], + outputs: Float32Array[][], + params?: Record, + ) => boolean; + }) + | undefined; + + class TestAudioWorkletProcessor { + public readonly port = { + postMessage: vi.fn(), + onmessage: null as ((event: MessageEvent) => void) | null, + }; + } + + const registerProcessor = vi.fn( + ( + _name: string, + ctor: new () => { + process: ( + inputs: Float32Array[][], + outputs: Float32Array[][], + params?: Record, + ) => boolean; + }, + ) => { + ProcessorCtor = ctor; + }, + ); + + const runWorkletModule = new Function( + "AudioWorkletProcessor", + "registerProcessor", + workletCode, + ); + runWorkletModule(TestAudioWorkletProcessor, registerProcessor); + + if (!ProcessorCtor) { + throw new Error("Expected worklet processor to be registered."); + } + + return new ProcessorCtor(); +} + +describe("RNNoiseProcessor", () => { + beforeEach(() => { + vi.stubGlobal( + "MediaStream", + class MediaStream { + public constructor(_tracks?: MediaStreamTrack[]) {} + }, + ); + }); + + afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllGlobals(); + }); + + it("initializes audio graph and exposes processed track", async () => { + const t = createTestContext(); + vi.stubGlobal("AudioWorkletNode", vi.fn().mockReturnValue(t.workletNode)); + const processor = new RNNoiseProcessor("balanced"); + + await processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + + expect(t.addModule).toHaveBeenCalledWith(RNNOISE_WORKLET_MODULE_URL); + expect(t.createSourceNode).toHaveBeenCalledOnce(); + expect(t.createDestinationNode).toHaveBeenCalledOnce(); + expect(t.workletNode.port.postMessage).toHaveBeenCalledWith({ + type: "preset", + preset: "balanced", + }); + expect(processor.processedTrack).toBe(t.processedTrack); + }); + + it("destroys processing graph and is idempotent", async () => { + const t = createTestContext(); + vi.stubGlobal("AudioWorkletNode", vi.fn().mockReturnValue(t.workletNode)); + const processor = new RNNoiseProcessor(); + + await processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + await processor.destroy(); + await processor.destroy(); + + expect(t.sourceNode.disconnect).toHaveBeenCalledOnce(); + expect(t.workletNode.disconnect).toHaveBeenCalledOnce(); + expect(t.destinationNode.disconnect).toHaveBeenCalledOnce(); + expect(t.workletNode.port.postMessage).toHaveBeenCalledWith({ + type: "destroy", + }); + expect(t.processedTrack.stop).toHaveBeenCalledOnce(); + expect(processor.processedTrack).toBeUndefined(); + }); + + it("destroy does not throw when no processed track exists", async () => { + const processor = new RNNoiseProcessor(); + + await expect(processor.destroy()).resolves.toBeUndefined(); + }); + + it("restart re-initializes with a new processed track", async () => { + const first = createTestContext(); + const second = createTestContext(); + const workletCtor = vi + .fn() + .mockReturnValueOnce(first.workletNode) + .mockReturnValueOnce(second.workletNode); + vi.stubGlobal("AudioWorkletNode", workletCtor); + + const processor = new RNNoiseProcessor(); + await processor.init({ + kind: Track.Kind.Audio, + track: first.track, + audioContext: first.audioContext, + }); + const firstProcessedTrack = processor.processedTrack; + + await processor.restart({ + kind: Track.Kind.Audio, + track: second.track, + audioContext: second.audioContext, + }); + + expect(processor.processedTrack).toBe(second.processedTrack); + expect(processor.processedTrack).not.toBe(firstProcessedTrack); + }); + + it("loads the worklet module once per AudioContext", async () => { + const t = createTestContext(); + vi.stubGlobal("AudioWorkletNode", vi.fn().mockReturnValue(t.workletNode)); + const firstProcessor = new RNNoiseProcessor(); + const secondProcessor = new RNNoiseProcessor(); + + await firstProcessor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + await secondProcessor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + + expect(t.addModule).toHaveBeenCalledOnce(); + }); + + it("reports support based on AudioWorklet availability", () => { + expect(supportsRNNoiseProcessor()).toBe(false); + vi.stubGlobal("AudioWorkletNode", class AudioWorkletNode {}); + vi.stubGlobal( + "MediaStreamAudioDestinationNode", + class MediaStreamAudioDestinationNode {}, + ); + vi.stubGlobal( + "MediaStreamAudioSourceNode", + class MediaStreamAudioSourceNode {}, + ); + vi.stubGlobal( + "AudioWorklet", + class AudioWorkletWithoutAddModule {}, + ); + expect(supportsRNNoiseProcessor()).toBe(false); + vi.stubGlobal( + "AudioWorklet", + class AudioWorklet { + public async addModule(): Promise { + await Promise.resolve(); + } + }, + ); + expect(supportsRNNoiseProcessor()).toBe(true); + }); + + it("updates worklet preset at runtime", async () => { + const t = createTestContext(); + vi.stubGlobal("AudioWorkletNode", vi.fn().mockReturnValue(t.workletNode)); + const processor = new RNNoiseProcessor(); + + await processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + + processor.setPreset("strong"); + + expect(t.workletNode.port.postMessage).toHaveBeenCalledWith({ + type: "preset", + preset: "strong", + }); + }); + + it("bypasses RNNoise for unsupported audio context sample rates", async () => { + const t = createTestContext(44100); + const workletCtor = vi.fn().mockReturnValue(t.workletNode); + const warningSpy = vi.spyOn(logger, "warn"); + vi.stubGlobal("AudioWorkletNode", workletCtor); + const processor = new RNNoiseProcessor(); + + await expect( + processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }), + ).rejects.toThrow("48000Hz"); + + expect(warningSpy).toHaveBeenCalledOnce(); + expect(t.addModule).not.toHaveBeenCalled(); + expect(workletCtor).not.toHaveBeenCalled(); + expect(processor.processedTrack).toBeUndefined(); + }); + + it("propagates worklet registration failures", async () => { + const t = createTestContext(); + const workletCtor = vi.fn().mockReturnValue(t.workletNode); + const addModuleError = new Error("Failed to register worklet module"); + t.addModule.mockRejectedValueOnce(addModuleError); + vi.stubGlobal("AudioWorkletNode", workletCtor); + const processor = new RNNoiseProcessor(); + + await expect( + processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }), + ).rejects.toThrow(addModuleError); + + expect(workletCtor).not.toHaveBeenCalled(); + }); + + it("restarts with the last known audio context when restart omits audioContext", async () => { + const t = createTestContext(); + const workletCtor = vi.fn().mockReturnValue(t.workletNode); + vi.stubGlobal("AudioWorkletNode", workletCtor); + const processor = new RNNoiseProcessor(); + + await processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + + const restartedTrack = { + id: "restarted-input-track", + kind: Track.Kind.Audio, + } as MediaStreamTrack; + await processor.restart({ + kind: Track.Kind.Audio, + track: restartedTrack, + // LiveKit restart paths can omit audioContext. + audioContext: undefined as unknown as AudioContext, + }); + + expect(t.addModule).toHaveBeenCalledOnce(); + expect(t.createSourceNode).toHaveBeenCalledTimes(2); + expect(workletCtor).toHaveBeenCalledTimes(2); + }); + + it("deterministically downmixes stereo input to mono in the worklet passthrough path", () => { + const workletCode = getGeneratedWorkletCode(); + const worklet = instantiateWorkletProcessor(workletCode); + const left = new Float32Array([1, -1, 0.5, 0]); + const right = new Float32Array([0, 1, -0.5, 0.5]); + const output = new Float32Array(left.length); + + const keepProcessing = worklet.process([[left, right]], [[output]], {}); + + expect(keepProcessing).toBe(true); + expect(output).toEqual(new Float32Array([0.5, 0, 0, 0.25])); + expect(output).toHaveLength(left.length); + }); + + it("downmixes all input channels by averaging each sample", () => { + const workletCode = getGeneratedWorkletCode(); + const worklet = instantiateWorkletProcessor(workletCode); + const first = new Float32Array([0.6, -0.3, 0.9]); + const second = new Float32Array([0.3, 0.3, -0.3]); + const third = new Float32Array([0, 0.6, 0]); + const output = new Float32Array(first.length); + + worklet.process([[first, second, third]], [[output]], {}); + + expect(output[0]).toBeCloseTo(0.3, 6); + expect(output[1]).toBeCloseTo(0.2, 6); + expect(output[2]).toBeCloseTo(0.2, 6); + expect(output).toHaveLength(first.length); + }); + + it("keeps the balanced preset tuning unchanged", () => { + const balanced = getPresetConfig(getGeneratedWorkletCode(), "balanced"); + + expect(balanced).toEqual({ + maxAttenuationDb: 8, + openThreshold: 0.9, + closeThreshold: 0.55, + holdFrames: 10, + attenuateMs: 90, + releaseMs: 22, + }); + }); + + it("maps strong preset to a more aggressive profile than balanced", () => { + const workletCode = getGeneratedWorkletCode(); + const balanced = getPresetConfig(workletCode, "balanced"); + const strong = getPresetConfig(workletCode, "strong"); + + expect(strong.maxAttenuationDb).toBeGreaterThan(balanced.maxAttenuationDb); + expect(strong.openThreshold).toBeGreaterThanOrEqual(balanced.openThreshold); + expect(strong.closeThreshold).toBeGreaterThanOrEqual( + balanced.closeThreshold, + ); + expect(strong.holdFrames).toBeLessThan(balanced.holdFrames); + expect(strong.attenuateMs).toBeLessThan(balanced.attenuateMs); + }); + + it("applies lower expected noise-floor gain on strong than balanced", () => { + const workletCode = getGeneratedWorkletCode(); + const balanced = getPresetConfig(workletCode, "balanced"); + const strong = getPresetConfig(workletCode, "strong"); + const noiseLikeVadProbabilities = [0.2, 0.4, 0.6, 0.8]; + + for (const vad of noiseLikeVadProbabilities) { + expect(expectedAttenuationDb(strong, vad)).toBeGreaterThanOrEqual( + expectedAttenuationDb(balanced, vad), + ); + } + + const balancedSilenceGain = Math.pow(10, -balanced.maxAttenuationDb / 20); + const strongSilenceGain = Math.pow(10, -strong.maxAttenuationDb / 20); + expect(strongSilenceGain).toBeLessThan(balancedSilenceGain); + }); + + it("init() called twice without destroy() cleans up previous nodes", async () => { + const first = createTestContext(); + const second = createTestContext(); + const workletCtor = vi + .fn() + .mockReturnValueOnce(first.workletNode) + .mockReturnValueOnce(second.workletNode); + vi.stubGlobal("AudioWorkletNode", workletCtor); + + const processor = new RNNoiseProcessor(); + await processor.init({ + kind: Track.Kind.Audio, + track: first.track, + audioContext: first.audioContext, + }); + await processor.init({ + kind: Track.Kind.Audio, + track: second.track, + audioContext: second.audioContext, + }); + + // First nodes must have been torn down + expect(first.sourceNode.disconnect).toHaveBeenCalledOnce(); + expect(first.workletNode.disconnect).toHaveBeenCalledOnce(); + expect(first.destinationNode.disconnect).toHaveBeenCalledOnce(); + expect(first.processedTrack.stop).toHaveBeenCalledOnce(); + + // Second nodes should now be active + expect(processor.processedTrack).toBe(second.processedTrack); + }); + + it("concurrent ensureWorkletRegistered calls only invoke addModule once", async () => { + const t = createTestContext(); + // Replace the default resolved mock with a controllable promise so both + // init() calls are in-flight at the same time. + let resolveAddModule!: () => void; + t.addModule.mockReturnValueOnce( + new Promise((resolve) => { + resolveAddModule = resolve; + }), + ); + vi.stubGlobal("AudioWorkletNode", vi.fn().mockReturnValue(t.workletNode)); + + const processor1 = new RNNoiseProcessor(); + const processor2 = new RNNoiseProcessor(); + + // Start both inits before either addModule resolves + const init1 = processor1.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + const init2 = processor2.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + + resolveAddModule(); + await Promise.all([init1, init2]); + + expect(t.addModule).toHaveBeenCalledOnce(); + }); + + it("does not expose an active graph after concurrent init and destroy", async () => { + const t = createTestContext(); + let resolveAddModule!: () => void; + t.addModule.mockReturnValueOnce( + new Promise((resolve) => { + resolveAddModule = resolve; + }), + ); + vi.stubGlobal("AudioWorkletNode", vi.fn().mockReturnValue(t.workletNode)); + + const processor = new RNNoiseProcessor(); + const initPromise = processor.init({ + kind: Track.Kind.Audio, + track: t.track, + audioContext: t.audioContext, + }); + + // destroy() races with the in-flight init(); let destroy complete first + await processor.destroy(); + + // Now let the worklet registration resolve and init() resume + resolveAddModule(); + await initPromise; + + // init() must have aborted after seeing destroyed=true; no track exposed + expect(processor.processedTrack).toBeUndefined(); + }); +}); diff --git a/src/audio/RNNoiseProcessor.ts b/src/audio/RNNoiseProcessor.ts new file mode 100644 index 0000000000..648267bd3e --- /dev/null +++ b/src/audio/RNNoiseProcessor.ts @@ -0,0 +1,469 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { logger } from "matrix-js-sdk/lib/logger"; + +import type { + AudioProcessorOptions, + Track, + TrackProcessor, +} from "livekit-client"; +import type { RNNoiseSuppressionPreset } from "./rnnoiseTypes"; +import rnnoiseWorkletModuleUrl from "./RNNoiseWorkletModule.ts?url"; + +/** + * The number of samples per frame expected by RNNoise (at 48kHz = 10ms). + */ +const RNNOISE_SAMPLE_LENGTH = 480; +const RNNOISE_REQUIRED_SAMPLE_RATE = 48000; +const RNNOISE_WORKLET_NAME = "rnnoise-processor"; +const DEFAULT_RNNOISE_PRESET: RNNoiseSuppressionPreset = "conservative"; +// Stores the addModule() promise per AudioContext: pending while in-flight, +// settled (resolved) once complete, absent on failure (cleared for retry). +const workletRegistrations = new WeakMap>(); +const warnedUnsupportedSampleRates = new Set(); + +type RNNoiseSupportGlobal = typeof globalThis & { + AudioWorklet?: { + prototype?: { + addModule?: unknown; + }; + }; +}; + +function createUnsupportedSampleRateError(sampleRate: number): Error { + return new Error( + `RNNoise requires an AudioContext sample rate of ${RNNOISE_REQUIRED_SAMPLE_RATE}Hz (received ${sampleRate}Hz).`, + ); +} + +function warnUnsupportedSampleRate(sampleRate: number): void { + if (warnedUnsupportedSampleRates.has(sampleRate)) { + return; + } + + warnedUnsupportedSampleRates.add(sampleRate); + logger.warn( + `Skipping RNNoise because AudioContext sample rate is ${sampleRate}Hz (expected ${RNNOISE_REQUIRED_SAMPLE_RATE}Hz).`, + ); +} + +/** + * Whether the current runtime supports the required APIs for RNNoise. + */ +export function supportsRNNoiseProcessor(): boolean { + const workletPrototype = (globalThis as RNNoiseSupportGlobal).AudioWorklet + ?.prototype; + + return ( + typeof AudioWorkletNode !== "undefined" && + typeof MediaStreamAudioDestinationNode !== "undefined" && + typeof MediaStreamAudioSourceNode !== "undefined" && + typeof workletPrototype?.addModule === "function" + ); +} + +/** + * Generates the AudioWorklet processor code as a string, for use in tests. + * + * WARNING: This function is the **test harness** version of the worklet. + * The authoritative runtime implementation lives in `RNNoiseWorkletModule.ts`, + * which Vite compiles and loads as a separate script via the `?url` import. + * If the processor logic changes in `RNNoiseWorkletModule.ts` — frame size, + * ring buffer, preset constants, downmix algorithm, etc. — the generated + * code here **must be updated to match** or tests will diverge from runtime + * behaviour. + * + * The worklet loads the RNNoise WASM module synchronously (base64-inlined) + * and processes audio in 480-sample frames. A ring buffer bridges the + * 128-sample AudioWorklet blocks to the 480-sample RNNoise frames. + */ +function createWorkletCode(rnnoiseModuleCode: string): string { + // Patch the rnnoise-sync.js for AudioWorklet scope: + // 1. Replace import.meta.url — not available in classic worklet scripts + // 2. Remove the ES module export statement + const patched = rnnoiseModuleCode + .replace(/import\.meta\.url/g, '""') + .replace(/export\s+default\s+createRNNWasmModuleSync;?\s*$/m, ""); + + return ` +${patched} + +const FRAME_SIZE = ${RNNOISE_SAMPLE_LENGTH}; +const RING_SIZE = FRAME_SIZE * 3; // Enough headroom for buffering +const SAMPLE_RATE = ${RNNOISE_REQUIRED_SAMPLE_RATE}; +const DEFAULT_PRESET = "${DEFAULT_RNNOISE_PRESET}"; +const PRESETS = { + conservative: { + maxAttenuationDb: 4, + openThreshold: 0.92, + closeThreshold: 0.60, + holdFrames: 12, + attenuateMs: 120, + releaseMs: 25, + }, + balanced: { + maxAttenuationDb: 8, + openThreshold: 0.90, + closeThreshold: 0.55, + holdFrames: 10, + attenuateMs: 90, + releaseMs: 22, + }, + strong: { + maxAttenuationDb: 16, + openThreshold: 0.90, + closeThreshold: 0.55, + holdFrames: 8, + attenuateMs: 55, + releaseMs: 18, + }, +}; + +class RNNoiseWorkletProcessor extends AudioWorkletProcessor { + constructor() { + super(); + this._ready = false; + this._destroyed = false; + + // Ring buffers + this._inBuf = new Float32Array(RING_SIZE); + this._outBuf = new Float32Array(RING_SIZE); + this._inW = 0; // input write position + this._inR = 0; // input read position + this._outW = 0; // output write position + this._outR = 0; // output read position + this._currentGain = 1; + this._targetGain = 1; + this._holdFrames = 0; + + this._setPreset(DEFAULT_PRESET); + + this._initRNNoise(); + + this.port.onmessage = (event) => { + if (event.data.type === 'destroy') { + this._cleanup(); + } else if (event.data.type === 'preset') { + this._setPreset(event.data.preset); + } + }; + } + + _smoothingStepFromMs(ms) { + if (ms <= 0) return 1; + const tau = ms / 1000; + return 1 - Math.exp(-1 / (SAMPLE_RATE * tau)); + } + + _setPreset(preset) { + if (!(preset in PRESETS)) return; + this._preset = preset; + const config = PRESETS[preset]; + this._maxAttenuationDb = config.maxAttenuationDb; + this._openThreshold = config.openThreshold; + this._closeThreshold = config.closeThreshold; + this._holdFramesConfig = config.holdFrames; + this._attenuateStep = this._smoothingStepFromMs(config.attenuateMs); + this._releaseStep = this._smoothingStepFromMs(config.releaseMs); + } + + _updateTargetGain(vadProbability) { + if (vadProbability >= this._openThreshold) { + this._holdFrames = this._holdFramesConfig; + this._targetGain = 1; + return; + } + + if (this._holdFrames > 0) { + this._holdFrames -= 1; + this._targetGain = 1; + return; + } + + const thresholdRange = this._openThreshold - this._closeThreshold; + const attenuationProgress = thresholdRange > 0 + ? Math.max( + 0, + Math.min(1, (this._openThreshold - vadProbability) / thresholdRange), + ) + : 1; + + const attenuationDb = attenuationProgress * this._maxAttenuationDb; + this._targetGain = Math.pow(10, -attenuationDb / 20); + } + + _ringAvailable(w, r) { + let avail = w - r; + if (avail < 0) avail += RING_SIZE; + return avail; + } + + _initRNNoise() { + try { + const module = createRNNWasmModuleSync(); + + // Allocate a buffer in WASM memory for one frame of float32 samples + const pcmBuf = module._malloc(FRAME_SIZE * 4); + module._rnnoise_init(); + const state = module._rnnoise_create(); + + this._module = module; + this._pcmBuf = pcmBuf; + this._state = state; + this._heapF32 = module.HEAPF32; + + this._ready = true; + } catch (e) { + // If RNNoise fails to initialize, audio will pass through unprocessed + this.port.postMessage({ type: 'error', message: String(e) }); + } + } + + _cleanup() { + if (this._module && this._state) { + this._module._rnnoise_destroy(this._state); + this._module._free(this._pcmBuf); + this._state = null; + } + this._destroyed = true; + } + + _processRNNoiseFrame() { + const heapIdx = this._pcmBuf >> 2; // byte offset → float32 index + + // Copy from input ring buffer to WASM heap, scaling to int16 range + for (let i = 0; i < FRAME_SIZE; i++) { + this._heapF32[heapIdx + i] = + this._inBuf[(this._inR + i) % RING_SIZE] * 32768.0; + } + this._inR = (this._inR + FRAME_SIZE) % RING_SIZE; + + // Run RNNoise denoising (in-place) + const vadProbability = this._module._rnnoise_process_frame( + this._state, this._pcmBuf, this._pcmBuf + ); + this._updateTargetGain(vadProbability); + + // Copy from WASM heap to output ring buffer, scaling back to float range. + // Apply additional conservative attenuation between speech segments. + for (let i = 0; i < FRAME_SIZE; i++) { + const smoothingStep = this._targetGain < this._currentGain + ? this._attenuateStep + : this._releaseStep; + this._currentGain += + (this._targetGain - this._currentGain) * smoothingStep; + this._outBuf[(this._outW + i) % RING_SIZE] = + (this._heapF32[heapIdx + i] / 32768.0) * this._currentGain; + } + this._outW = (this._outW + FRAME_SIZE) % RING_SIZE; + } + + _mixInputChannels(inputChannels, sampleIndex, channelCount) { + // RNNoise is mono-only; average all channels for deterministic downmixing. + let mixed = 0; + for (let i = 0; i < channelCount; i++) { + const channel = inputChannels[i]; + mixed += channel ? (channel[sampleIndex] ?? 0) : 0; + } + return mixed / channelCount; + } + + process(inputs, outputs) { + if (this._destroyed) return false; + + const inputChannels = inputs[0]; + const output = outputs[0]?.[0]; + + if (!inputChannels?.length || !output) return true; + + const blockSize = output.length; + const channelCount = inputChannels.length; + + if (!this._ready) { + // Pass through until RNNoise is ready, with deterministic mono downmix. + for (let i = 0; i < blockSize; i++) { + output[i] = this._mixInputChannels(inputChannels, i, channelCount); + } + return true; + } + + // Write input samples to the input ring buffer + for (let i = 0; i < blockSize; i++) { + this._inBuf[this._inW] = this._mixInputChannels( + inputChannels, + i, + channelCount, + ); + this._inW = (this._inW + 1) % RING_SIZE; + } + + // Process complete frames + while (this._ringAvailable(this._inW, this._inR) >= FRAME_SIZE) { + this._processRNNoiseFrame(); + } + + // Read from output ring buffer + const outAvail = this._ringAvailable(this._outW, this._outR); + const toRead = Math.min(blockSize, outAvail); + + for (let i = 0; i < toRead; i++) { + output[i] = this._outBuf[this._outR]; + this._outR = (this._outR + 1) % RING_SIZE; + } + // Fill remaining with silence (only during initial buffering) + for (let i = toRead; i < blockSize; i++) { + output[i] = 0; + } + + return true; + } +} + +registerProcessor('${RNNOISE_WORKLET_NAME}', RNNoiseWorkletProcessor); +`; +} + +export function createRNNoiseWorkletCodeForTesting( + rnnoiseModuleCode: string, +): string { + return createWorkletCode(rnnoiseModuleCode); +} + +/** + * A LiveKit TrackProcessor that applies RNNoise-based noise suppression + * to a local audio track via an AudioWorklet. + * + * The RNNoise WASM binary is lazy-loaded only when the processor is + * initialized, keeping the main bundle small. + */ +export class RNNoiseProcessor implements TrackProcessor< + Track.Kind.Audio, + AudioProcessorOptions +> { + public name = "rnnoise-noise-suppression"; + public processedTrack?: MediaStreamTrack; + + private sourceNode?: MediaStreamAudioSourceNode; + private workletNode?: AudioWorkletNode; + private destinationNode?: MediaStreamAudioDestinationNode; + private destroyed = false; + private preset: RNNoiseSuppressionPreset; + private lastAudioContext?: AudioContext; + + public constructor( + preset: RNNoiseSuppressionPreset = DEFAULT_RNNOISE_PRESET, + ) { + this.preset = preset; + } + + private async ensureWorkletRegistered(audioContext: AudioContext): Promise { + const existing = workletRegistrations.get(audioContext); + if (existing) return existing; + + const pending = audioContext.audioWorklet.addModule(rnnoiseWorkletModuleUrl); + workletRegistrations.set(audioContext, pending); + // On failure, remove the entry so the next call can retry. + pending.catch(() => { + workletRegistrations.delete(audioContext); + }); + return pending; + } + + public async init(opts: AudioProcessorOptions): Promise { + // If already initialized, tear down previous nodes before re-initializing + // so callers don't need to explicitly call destroy() first. + if (this.workletNode !== undefined) { + await this.destroy(); + } + this.destroyed = false; + const { audioContext, track } = opts; + + if (audioContext.sampleRate !== RNNOISE_REQUIRED_SAMPLE_RATE) { + warnUnsupportedSampleRate(audioContext.sampleRate); + throw createUnsupportedSampleRateError(audioContext.sampleRate); + } + + await this.ensureWorkletRegistered(audioContext); + + // A concurrent destroy() may have run while we awaited worklet registration. + if (this.destroyed) return; + + // Build the audio processing graph: + // MediaStreamSource → AudioWorkletNode (RNNoise) → MediaStreamDestination + const sourceNode = audioContext.createMediaStreamSource( + new MediaStream([track]), + ); + const workletNode = new AudioWorkletNode( + audioContext, + RNNOISE_WORKLET_NAME, + { + channelCount: 1, + channelCountMode: "explicit", + }, + ); + const destinationNode = audioContext.createMediaStreamDestination(); + + sourceNode.connect(workletNode); + workletNode.connect(destinationNode); + + this.sourceNode = sourceNode; + this.workletNode = workletNode; + this.destinationNode = destinationNode; + this.workletNode.port.postMessage({ type: "preset", preset: this.preset }); + this.processedTrack = destinationNode.stream.getAudioTracks()[0]; + this.lastAudioContext = audioContext; + } + + public async restart(opts: AudioProcessorOptions): Promise { + const audioContext = opts.audioContext ?? this.lastAudioContext; + if (!audioContext) { + throw new Error( + "RNNoise restart requires an AudioContext when no previous context has been initialized.", + ); + } + + await this.destroy(); + await this.init({ ...opts, audioContext }); + } + + public async destroy(): Promise { + if (this.destroyed) { + await Promise.resolve(); + return; + } + this.destroyed = true; + + // Signal the worklet to clean up WASM resources + this.workletNode?.port.postMessage({ type: "destroy" }); + + // Disconnect the audio graph + this.sourceNode?.disconnect(); + this.workletNode?.disconnect(); + this.destinationNode?.disconnect(); + + try { + this.processedTrack?.stop(); + } catch (e) { + logger.warn("Failed to stop RNNoise processed track during destroy", e); + } + + this.sourceNode = undefined; + this.workletNode = undefined; + this.destinationNode = undefined; + this.processedTrack = undefined; + await Promise.resolve(); + } + + public setPreset(preset: RNNoiseSuppressionPreset): void { + this.preset = preset; + this.workletNode?.port.postMessage({ + type: "preset", + preset: this.preset, + }); + } +} diff --git a/src/audio/RNNoiseWorkletModule.ts b/src/audio/RNNoiseWorkletModule.ts new file mode 100644 index 0000000000..b551d7aa5b --- /dev/null +++ b/src/audio/RNNoiseWorkletModule.ts @@ -0,0 +1,303 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import createRNNWasmModuleSync from "@jitsi/rnnoise-wasm/dist/rnnoise-sync.js"; + +import type { RNNoiseSuppressionPreset } from "./rnnoiseTypes"; + +declare abstract class AudioWorkletProcessor { + protected constructor(options?: AudioWorkletNodeOptions); + public readonly port: MessagePort; +} + +declare function registerProcessor( + name: string, + processorCtor: new (options?: AudioWorkletNodeOptions) => AudioWorkletProcessor, +): void; + +const FRAME_SIZE = 480; +const RING_SIZE = FRAME_SIZE * 3; +const SAMPLE_RATE = 48000; +const RNNOISE_WORKLET_NAME = "rnnoise-processor"; +const DEFAULT_PRESET: RNNoiseSuppressionPreset = "conservative"; + +type PresetConfig = { + maxAttenuationDb: number; + openThreshold: number; + closeThreshold: number; + holdFrames: number; + attenuateMs: number; + releaseMs: number; +}; + +type RNNoiseModule = { + HEAPF32: Float32Array; + [key: string]: unknown; +}; + +type WorkletMessage = + | { type: "destroy" } + | { type: "preset"; preset: RNNoiseSuppressionPreset }; + +const PRESETS: Record = { + conservative: { + maxAttenuationDb: 4, + openThreshold: 0.92, + closeThreshold: 0.6, + holdFrames: 12, + attenuateMs: 120, + releaseMs: 25, + }, + balanced: { + maxAttenuationDb: 8, + openThreshold: 0.9, + closeThreshold: 0.55, + holdFrames: 10, + attenuateMs: 90, + releaseMs: 22, + }, + strong: { + maxAttenuationDb: 16, + openThreshold: 0.9, + closeThreshold: 0.55, + holdFrames: 8, + attenuateMs: 55, + releaseMs: 18, + }, +}; + +function isPreset( + preset: unknown, +): preset is keyof typeof PRESETS & RNNoiseSuppressionPreset { + return typeof preset === "string" && preset in PRESETS; +} + +class RNNoiseWorkletProcessor extends AudioWorkletProcessor { + private ready = false; + private destroyed = false; + private readonly inBuf = new Float32Array(RING_SIZE); + private readonly outBuf = new Float32Array(RING_SIZE); + private inW = 0; + private inR = 0; + private outW = 0; + private outR = 0; + private currentGain = 1; + private targetGain = 1; + private holdFrames = 0; + private maxAttenuationDb = PRESETS[DEFAULT_PRESET].maxAttenuationDb; + private openThreshold = PRESETS[DEFAULT_PRESET].openThreshold; + private closeThreshold = PRESETS[DEFAULT_PRESET].closeThreshold; + private holdFramesConfig = PRESETS[DEFAULT_PRESET].holdFrames; + private attenuateStep = 1; + private releaseStep = 1; + private module?: RNNoiseModule; + private pcmBuf?: number; + private state: number | null = null; + private heapF32?: Float32Array; + + public constructor() { + super(); + + this.setPreset(DEFAULT_PRESET); + this.initRNNoise(); + + this.port.onmessage = (event: MessageEvent): void => { + if (event.data.type === "destroy") { + this.cleanup(); + } else if ( + event.data.type === "preset" && + isPreset(event.data.preset) + ) { + this.setPreset(event.data.preset); + } + }; + } + + private smoothingStepFromMs(ms: number): number { + if (ms <= 0) return 1; + const tau = ms / 1000; + return 1 - Math.exp(-1 / (SAMPLE_RATE * tau)); + } + + private setPreset(preset: RNNoiseSuppressionPreset): void { + if (!isPreset(preset)) return; + + const config = PRESETS[preset]; + this.maxAttenuationDb = config.maxAttenuationDb; + this.openThreshold = config.openThreshold; + this.closeThreshold = config.closeThreshold; + this.holdFramesConfig = config.holdFrames; + this.attenuateStep = this.smoothingStepFromMs(config.attenuateMs); + this.releaseStep = this.smoothingStepFromMs(config.releaseMs); + } + + private updateTargetGain(vadProbability: number): void { + if (vadProbability >= this.openThreshold) { + this.holdFrames = this.holdFramesConfig; + this.targetGain = 1; + return; + } + + if (this.holdFrames > 0) { + this.holdFrames -= 1; + this.targetGain = 1; + return; + } + + const thresholdRange = this.openThreshold - this.closeThreshold; + const attenuationProgress = + thresholdRange > 0 + ? Math.max( + 0, + Math.min(1, (this.openThreshold - vadProbability) / thresholdRange), + ) + : 1; + + const attenuationDb = attenuationProgress * this.maxAttenuationDb; + this.targetGain = Math.pow(10, -attenuationDb / 20); + } + + private ringAvailable(w: number, r: number): number { + let available = w - r; + if (available < 0) available += RING_SIZE; + return available; + } + + private initRNNoise(): void { + try { + const module = createRNNWasmModuleSync() as unknown as RNNoiseModule; + const malloc = module["_malloc"] as (size: number) => number; + const rnnoiseInit = module["_rnnoise_init"] as () => void; + const rnnoiseCreate = module["_rnnoise_create"] as () => number; + const pcmBuf = malloc(FRAME_SIZE * 4); + rnnoiseInit(); + const state = rnnoiseCreate(); + + this.module = module; + this.pcmBuf = pcmBuf; + this.state = state; + this.heapF32 = module.HEAPF32; + this.ready = true; + } catch (error) { + this.port.postMessage({ type: "error", message: String(error) }); + } + } + + private cleanup(): void { + if ( + this.module && + this.state !== null && + this.pcmBuf !== undefined + ) { + const rnnoiseDestroy = this.module["_rnnoise_destroy"] as ( + state: number, + ) => void; + const free = this.module["_free"] as (ptr: number) => void; + rnnoiseDestroy(this.state); + free(this.pcmBuf); + this.state = null; + } + this.destroyed = true; + } + + private processRNNoiseFrame(): void { + if ( + !this.module || + this.state === null || + this.pcmBuf === undefined || + !this.heapF32 + ) { + return; + } + + const heapIdx = this.pcmBuf >> 2; + + for (let i = 0; i < FRAME_SIZE; i++) { + this.heapF32[heapIdx + i] = this.inBuf[(this.inR + i) % RING_SIZE] * 32768; + } + this.inR = (this.inR + FRAME_SIZE) % RING_SIZE; + + const rnnoiseProcessFrame = this.module["_rnnoise_process_frame"] as ( + state: number, + input: number, + output: number, + ) => number; + const vadProbability = rnnoiseProcessFrame( + this.state, + this.pcmBuf, + this.pcmBuf, + ); + this.updateTargetGain(vadProbability); + + for (let i = 0; i < FRAME_SIZE; i++) { + const smoothingStep = + this.targetGain < this.currentGain + ? this.attenuateStep + : this.releaseStep; + this.currentGain += (this.targetGain - this.currentGain) * smoothingStep; + this.outBuf[(this.outW + i) % RING_SIZE] = + (this.heapF32[heapIdx + i] / 32768) * this.currentGain; + } + this.outW = (this.outW + FRAME_SIZE) % RING_SIZE; + } + + private mixInputChannels( + inputChannels: Float32Array[], + sampleIndex: number, + channelCount: number, + ): number { + let mixed = 0; + for (let i = 0; i < channelCount; i++) { + const channel = inputChannels[i]; + mixed += channel ? (channel[sampleIndex] ?? 0) : 0; + } + return mixed / channelCount; + } + + public process(inputs: Float32Array[][], outputs: Float32Array[][]): boolean { + if (this.destroyed) return false; + + const inputChannels = inputs[0]; + const output = outputs[0]?.[0]; + + if (!inputChannels?.length || !output) return true; + + const blockSize = output.length; + const channelCount = inputChannels.length; + + if (!this.ready) { + for (let i = 0; i < blockSize; i++) { + output[i] = this.mixInputChannels(inputChannels, i, channelCount); + } + return true; + } + + for (let i = 0; i < blockSize; i++) { + this.inBuf[this.inW] = this.mixInputChannels(inputChannels, i, channelCount); + this.inW = (this.inW + 1) % RING_SIZE; + } + + while (this.ringAvailable(this.inW, this.inR) >= FRAME_SIZE) { + this.processRNNoiseFrame(); + } + + const outAvailable = this.ringAvailable(this.outW, this.outR); + const toRead = Math.min(blockSize, outAvailable); + + for (let i = 0; i < toRead; i++) { + output[i] = this.outBuf[this.outR]; + this.outR = (this.outR + 1) % RING_SIZE; + } + for (let i = toRead; i < blockSize; i++) { + output[i] = 0; + } + + return true; + } +} + +registerProcessor(RNNOISE_WORKLET_NAME, RNNoiseWorkletProcessor); diff --git a/src/audio/noiseSuppressionPolicy.test.ts b/src/audio/noiseSuppressionPolicy.test.ts new file mode 100644 index 0000000000..5ad2deaf3a --- /dev/null +++ b/src/audio/noiseSuppressionPolicy.test.ts @@ -0,0 +1,49 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { describe, expect, it } from "vitest"; + +import { shouldEnableNativeNoiseSuppression } from "./noiseSuppressionPolicy"; + +describe("shouldEnableNativeNoiseSuppression", () => { + it("keeps native suppression enabled when RNNoise is enabled but unsupported", () => { + expect( + shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression: true, + rnnoiseEnabled: true, + rnnoiseSupported: false, + }), + ).toBe(true); + }); + + it("disables native suppression only when RNNoise is enabled and supported", () => { + expect( + shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression: true, + rnnoiseEnabled: true, + rnnoiseSupported: true, + }), + ).toBe(false); + }); + + it("keeps native suppression disabled when explicitly disabled by URL", () => { + expect( + shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression: false, + rnnoiseEnabled: false, + rnnoiseSupported: false, + }), + ).toBe(false); + expect( + shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression: false, + rnnoiseEnabled: true, + rnnoiseSupported: false, + }), + ).toBe(false); + }); +}); diff --git a/src/audio/noiseSuppressionPolicy.ts b/src/audio/noiseSuppressionPolicy.ts new file mode 100644 index 0000000000..93ec150be0 --- /dev/null +++ b/src/audio/noiseSuppressionPolicy.ts @@ -0,0 +1,26 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +interface NativeNoiseSuppressionPolicyOptions { + urlNoiseSuppression: boolean | undefined; + rnnoiseEnabled: boolean; + rnnoiseSupported: boolean; +} + +/** + * Resolves whether native browser noise suppression should be enabled. + * + * Native suppression should only be disabled when RNNoise is both enabled and + * actually supported at runtime. + */ +export function shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression, + rnnoiseEnabled, + rnnoiseSupported, +}: NativeNoiseSuppressionPolicyOptions): boolean { + return (urlNoiseSuppression ?? true) && !(rnnoiseEnabled && rnnoiseSupported); +} diff --git a/src/audio/rnnoiseTypes.ts b/src/audio/rnnoiseTypes.ts new file mode 100644 index 0000000000..6d6d964f00 --- /dev/null +++ b/src/audio/rnnoiseTypes.ts @@ -0,0 +1,15 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +export const rnnoiseSuppressionPresets = [ + "conservative", + "balanced", + "strong", +] as const; + +export type RNNoiseSuppressionPreset = + (typeof rnnoiseSuppressionPresets)[number]; diff --git a/src/settings/SettingsModal.test.tsx b/src/settings/SettingsModal.test.tsx new file mode 100644 index 0000000000..c4a93e437e --- /dev/null +++ b/src/settings/SettingsModal.test.tsx @@ -0,0 +1,189 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +import { afterEach, describe, beforeEach, expect, it, vi } from "vitest"; +import { render, screen } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { TooltipProvider } from "@vector-im/compound-web"; + +import type { MatrixClient } from "matrix-js-sdk"; +import type { ReactNode } from "react"; +import { SettingsModal } from "./SettingsModal"; +import { + rnnoiseNoiseSuppression, + rnnoiseNoiseSuppressionPreset, +} from "./settings"; +import { supportsRNNoiseProcessor } from "../audio/RNNoiseProcessor"; + +const { mockRequestDeviceNames } = vi.hoisted(() => ({ + mockRequestDeviceNames: vi.fn(), +})); + +vi.mock("../audio/RNNoiseProcessor", async () => { + const actual = await vi.importActual("../audio/RNNoiseProcessor"); + + return { + ...actual, + supportsRNNoiseProcessor: vi.fn(() => true), + }; +}); + +vi.mock("../Modal", () => ({ + Modal: ({ + open, + children, + }: { + open: boolean; + children: ReactNode; + }): ReactNode => (open ?
{children}
: null), +})); + +vi.mock("../tabs/Tabs", () => ({ + TabContainer: ({ + tab, + tabs, + }: { + tab: string; + tabs: { key: string; content: ReactNode }[]; + }): ReactNode => ( +
{tabs.find((candidate) => candidate.key === tab)?.content}
+ ), +})); + +vi.mock("../MediaDevicesContext", () => ({ + useMediaDevices: (): { + requestDeviceNames: typeof mockRequestDeviceNames; + audioInput: object; + audioOutput: object; + videoInput: object; + } => ({ + requestDeviceNames: mockRequestDeviceNames, + audioInput: {}, + audioOutput: {}, + videoInput: {}, + }), +})); + +vi.mock("./DeviceSelection", () => ({ + DeviceSelection: (): ReactNode =>
, +})); + +vi.mock("../livekit/TrackProcessorContext", () => ({ + useTrackProcessor: (): { supported: boolean; processor: undefined } => ({ + supported: true, + processor: undefined, + }), +})); + +vi.mock("./submit-rageshake", () => ({ + useSubmitRageshake: (): { + submitRageshake: ReturnType; + sending: boolean; + sent: boolean; + error: undefined; + available: boolean; + } => ({ + submitRageshake: vi.fn(), + sending: false, + sent: false, + error: undefined, + available: false, + }), +})); + +vi.mock("../UrlParams", async () => { + const actual = await vi.importActual("../UrlParams"); + return { + ...actual, + useUrlParams: (): { controlledAudioDevices: boolean } => ({ + controlledAudioDevices: false, + }), + }; +}); + +function renderSettingsModal(): void { + render( + + + , + ); +} + +describe("SettingsModal RNNoise controls", () => { + beforeEach(() => { + vi.stubGlobal( + "ResizeObserver", + class ResizeObserver { + public observe(): void {} + public unobserve(): void {} + public disconnect(): void {} + }, + ); + localStorage.clear(); + mockRequestDeviceNames.mockClear(); + rnnoiseNoiseSuppressionPreset.setValue("conservative"); + rnnoiseNoiseSuppression.setValue(false); + vi.mocked(supportsRNNoiseProcessor).mockReturnValue(true); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + }); + + it("renders the RNNoise checkbox in the audio tab", () => { + renderSettingsModal(); + + expect( + screen.getByLabelText("Enable enhanced noise suppression (RNNoise)"), + ).toBeInTheDocument(); + expect(mockRequestDeviceNames).toHaveBeenCalledOnce(); + }); + + it("disables RNNoise when AudioWorklet support is unavailable", () => { + vi.mocked(supportsRNNoiseProcessor).mockReturnValue(false); + rnnoiseNoiseSuppression.setValue(true); + + renderSettingsModal(); + + const checkbox = screen.getByLabelText( + "Enable enhanced noise suppression (RNNoise)", + ); + expect(checkbox).toBeDisabled(); + expect(checkbox).not.toBeChecked(); + expect( + screen.getByText( + "(Enhanced noise suppression is not supported by this browser.)", + ), + ).toBeInTheDocument(); + expect( + screen.queryByText( + "Pick a suppression profile. Stronger modes remove more keyboard noise but can sound more processed.", + ), + ).not.toBeInTheDocument(); + }); + + it("persists RNNoise setting when toggled", async () => { + const user = userEvent.setup(); + renderSettingsModal(); + + const checkbox = screen.getByLabelText( + "Enable enhanced noise suppression (RNNoise)", + ); + await user.click(checkbox); + + expect(rnnoiseNoiseSuppression.getValue()).toBe(true); + expect( + localStorage.getItem("matrix-setting-rnnoise-noise-suppression"), + ).toBe("true"); + }); +}); diff --git a/src/settings/SettingsModal.tsx b/src/settings/SettingsModal.tsx index 2b4078aa50..659ec8f55f 100644 --- a/src/settings/SettingsModal.tsx +++ b/src/settings/SettingsModal.tsx @@ -5,10 +5,24 @@ SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial Please see LICENSE in the repository root for full details. */ -import { type FC, type ReactNode, useEffect, useState } from "react"; +import { + type ChangeEvent, + type FC, + type ReactNode, + useEffect, + useId, + useState, +} from "react"; import { useTranslation } from "react-i18next"; import { type MatrixClient } from "matrix-js-sdk"; -import { Button, Root as Form, Separator } from "@vector-im/compound-web"; +import { + Button, + InlineField, + Label, + RadioControl, + Root as Form, + Separator, +} from "@vector-im/compound-web"; import { type Room as LivekitRoom } from "livekit-client"; import { Modal } from "../Modal"; @@ -24,6 +38,8 @@ import { soundEffectVolume as soundEffectVolumeSetting, backgroundBlur as backgroundBlurSetting, developerMode, + rnnoiseNoiseSuppression as rnnoiseNoiseSuppressionSetting, + rnnoiseNoiseSuppressionPreset as rnnoiseNoiseSuppressionPresetSetting, } from "./settings"; import { PreferencesSettingsTab } from "./PreferencesSettingsTab"; import { Slider } from "../Slider"; @@ -34,6 +50,11 @@ import { FieldRow, InputField } from "../input/Input"; import { useSubmitRageshake } from "./submit-rageshake"; import { useUrlParams } from "../UrlParams"; import { useBehavior } from "../useBehavior"; +import { supportsRNNoiseProcessor } from "../audio/RNNoiseProcessor"; +import { + type RNNoiseSuppressionPreset, + rnnoiseSuppressionPresets, +} from "../audio/rnnoiseTypes"; type SettingsTab = | "audio" @@ -98,6 +119,68 @@ export const SettingsModal: FC = ({ ); }; + const RNNoiseCheckbox: React.FC = (): ReactNode => { + const supported = supportsRNNoiseProcessor(); + const [rnnoiseEnabled, setRnnoiseEnabled] = useSetting( + rnnoiseNoiseSuppressionSetting, + ); + const [rnnoisePreset, setRnnoisePreset] = useSetting( + rnnoiseNoiseSuppressionPresetSetting, + ); + const rnnoisePresetGroup = useId(); + + const onPresetChange = (e: ChangeEvent): void => { + setRnnoisePreset(e.target.value as RNNoiseSuppressionPreset); + }; + + const presetLabelByPreset: Record = { + conservative: t("settings.audio_tab.rnnoise_preset_conservative"), + balanced: t("settings.audio_tab.rnnoise_preset_balanced"), + strong: t("settings.audio_tab.rnnoise_preset_strong"), + }; + const effectiveRnnoiseEnabled = supported && !!rnnoiseEnabled; + + return ( + <> +

{t("settings.audio_tab.rnnoise_header")}

+ + setRnnoiseEnabled(e.target.checked)} + disabled={!supported} + /> + + {effectiveRnnoiseEnabled && ( + <> +

{t("settings.audio_tab.rnnoise_preset_description")}

+ {rnnoiseSuppressionPresets.map((preset) => ( + + } + > + + + ))} + + )} + + ); + }; + const devices = useMediaDevices(); useEffect(() => { if (open) devices.requestDeviceNames(); @@ -164,6 +247,8 @@ export const SettingsModal: FC = ({ step={0.01} />
+ + ), diff --git a/src/settings/settings.ts b/src/settings/settings.ts index a674f1aae0..0993a15116 100644 --- a/src/settings/settings.ts +++ b/src/settings/settings.ts @@ -9,6 +9,7 @@ import { logger } from "matrix-js-sdk/lib/logger"; import { BehaviorSubject } from "rxjs"; import { PosthogAnalytics } from "../analytics/PosthogAnalytics"; +import type { RNNoiseSuppressionPreset } from "../audio/rnnoiseTypes"; import { type Behavior } from "../state/Behavior"; import { useBehavior } from "../useBehavior"; @@ -97,6 +98,15 @@ export const videoInput = new Setting( ); export const backgroundBlur = new Setting("background-blur", false); +export const rnnoiseNoiseSuppression = new Setting( + "rnnoise-noise-suppression", + false, +); +export const rnnoiseNoiseSuppressionPreset = + new Setting( + "rnnoise-noise-suppression-preset", + "conservative", + ); export const showHandRaisedTimer = new Setting( "hand-raised-show-timer", diff --git a/src/state/CallViewModel/CallViewModel.ts b/src/state/CallViewModel/CallViewModel.ts index c19c4818dc..9e9c4ef460 100644 --- a/src/state/CallViewModel/CallViewModel.ts +++ b/src/state/CallViewModel/CallViewModel.ts @@ -61,8 +61,11 @@ import { duplicateTiles, MatrixRTCMode, playReactionsSound, + rnnoiseNoiseSuppression, showReactions, } from "../../settings/settings"; +import { shouldEnableNativeNoiseSuppression } from "../../audio/noiseSuppressionPolicy"; +import { supportsRNNoiseProcessor } from "../../audio/RNNoiseProcessor"; import { isFirefox } from "../../Platform"; import { setPipEnabled$ } from "../../controls"; import { TileStore } from "../TileStore"; @@ -480,7 +483,11 @@ export function createCallViewModel$( getUrlParams().controlledAudioDevices, options.livekitRoomFactory, getUrlParams().echoCancellation, - getUrlParams().noiseSuppression, + shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression: getUrlParams().noiseSuppression, + rnnoiseEnabled: rnnoiseNoiseSuppression.getValue(), + rnnoiseSupported: supportsRNNoiseProcessor(), + }), ); const connectionManager = createConnectionManager$({ diff --git a/src/state/CallViewModel/localMember/Publisher.test.ts b/src/state/CallViewModel/localMember/Publisher.test.ts index a0eaa2fd67..b69fdc0030 100644 --- a/src/state/CallViewModel/localMember/Publisher.test.ts +++ b/src/state/CallViewModel/localMember/Publisher.test.ts @@ -27,6 +27,11 @@ import { import { Publisher } from "./Publisher"; import { type Connection } from "../remoteMembers/Connection"; import { type MuteStates } from "../../MuteStates"; +import { + rnnoiseNoiseSuppression, + rnnoiseNoiseSuppressionPreset, +} from "../../../settings/settings"; +import type { RNNoiseProcessor } from "../../../audio/RNNoiseProcessor"; let scope: ObservableScope; @@ -37,8 +42,12 @@ beforeEach(() => { afterEach(() => scope.end()); function createMockLocalTrack(source: Track.Source): LocalTrack { + let processor: { name: string } | undefined; + const kind = + source === Track.Source.Microphone ? Track.Kind.Audio : Track.Kind.Video; const track = { source, + kind, isMuted: false, isUpstreamPaused: false, } as Partial as LocalTrack; @@ -57,6 +66,16 @@ function createMockLocalTrack(source: Track.Source): LocalTrack { // @ts-expect-error - for that test we want to set isUpstreamPaused directly track.isUpstreamPaused = false; }); + vi.mocked(track).getProcessor = vi.fn().mockImplementation(() => processor); + vi.mocked(track).setProcessor = vi + .fn() + .mockImplementation((nextProcessor) => { + processor = nextProcessor as { name: string }; + }); + vi.mocked(track).stopProcessor = vi.fn().mockImplementation(() => { + processor = undefined; + }); + vi.mocked(track).restartTrack = vi.fn().mockResolvedValue(undefined); return track; } @@ -96,6 +115,8 @@ let trackPublications: LocalTrackPublication[]; let createTrackLock: Promise; beforeEach(() => { + rnnoiseNoiseSuppression.setValue(false); + rnnoiseNoiseSuppressionPreset.setValue("conservative"); trackPublications = []; audioEnabled$ = new BehaviorSubject(false); videoEnabled$ = new BehaviorSubject(false); @@ -339,6 +360,282 @@ describe("Publisher", () => { }); it("does mute unmute audio", async () => {}); + + describe("RNNoise", () => { + beforeEach(() => { + vi.stubGlobal("AudioWorkletNode", class AudioWorkletNode {}); + vi.stubGlobal( + "AudioWorklet", + class AudioWorklet { + public async addModule(): Promise { + await Promise.resolve(); + } + }, + ); + vi.stubGlobal( + "MediaStreamAudioDestinationNode", + class MediaStreamAudioDestinationNode {}, + ); + vi.stubGlobal( + "MediaStreamAudioSourceNode", + class MediaStreamAudioSourceNode {}, + ); + }); + + afterEach(() => { + vi.unstubAllGlobals(); + rnnoiseNoiseSuppression.setValue(false); + rnnoiseNoiseSuppressionPreset.setValue("conservative"); + }); + + it("enabling setting applies RNNoise processor on microphone track", async () => { + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { setProcessor: (...args: unknown[]) => void }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + await flushPromises(); + + expect(micTrack.setProcessor).toHaveBeenCalledOnce(); + }); + + it("disabling setting removes RNNoise processor on microphone track", async () => { + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { + setProcessor: (...args: unknown[]) => void; + stopProcessor: () => void; + }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + await flushPromises(); + rnnoiseNoiseSuppression.setValue(false); + await flushPromises(); + + expect(micTrack.setProcessor).toHaveBeenCalledOnce(); + expect(micTrack.stopProcessor).toHaveBeenCalledOnce(); + }); + + it("auto-disables RNNoise when processor setup fails and falls back to native noise suppression", async () => { + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { + setProcessor: (...args: unknown[]) => Promise; + restartTrack: (...args: unknown[]) => Promise; + }; + const processorError = new Error("RNNoise setup failed"); + vi.mocked(micTrack.setProcessor).mockRejectedValueOnce(processorError); + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + + // The operation queue serializes restart/sync jobs. Flush enough microtasks + // to allow the fallback toggle and restart to complete. + for (let i = 0; i < 5; i++) { + await flushPromises(); + } + + expect(micTrack.setProcessor).toHaveBeenCalledOnce(); + expect(rnnoiseNoiseSuppression.getValue()).toBe(false); + expect(micTrack.restartTrack).toHaveBeenCalled(); + const restartCalls = vi.mocked(micTrack.restartTrack).mock.calls; + const finalRestartConfig = restartCalls[restartCalls.length - 1]?.[0] as { + noiseSuppression?: boolean; + }; + expect(finalRestartConfig).toEqual( + expect.objectContaining({ noiseSuppression: true }), + ); + }); + + it("restarts microphone track with native noise suppression disabled when RNNoise is enabled", async () => { + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { restartTrack: (...args: unknown[]) => void }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + await flushPromises(); + + expect(micTrack.restartTrack).toHaveBeenCalledWith( + expect.objectContaining({ + noiseSuppression: false, + }), + ); + }); + + it("restarts a newly-created publisher microphone track with native suppression disabled when RNNoise is already enabled", async () => { + await publisher.destroy(); + rnnoiseNoiseSuppression.setValue(true); + + const freshPublisher = new Publisher( + connection, + mockMediaDevices({}), + muteStates, + constant({ supported: false, processor: undefined }), + logger, + ); + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { + restartTrack: (...args: unknown[]) => void; + setProcessor: (...args: unknown[]) => void; + }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + try { + await flushPromises(); + + expect(micTrack.restartTrack).toHaveBeenCalledWith( + expect.objectContaining({ + noiseSuppression: false, + }), + ); + expect(micTrack.setProcessor).toHaveBeenCalledOnce(); + expect( + vi.mocked(micTrack.restartTrack).mock.invocationCallOrder[0], + ).toBeLessThan( + vi.mocked(micTrack.setProcessor).mock.invocationCallOrder[0], + ); + } finally { + await freshPublisher.destroy(); + } + }); + + it("keeps native noise suppression enabled and skips processor when RNNoise is unsupported", async () => { + vi.stubGlobal("AudioWorkletNode", undefined); + vi.stubGlobal("AudioWorklet", undefined); + vi.stubGlobal("MediaStreamAudioDestinationNode", undefined); + vi.stubGlobal("MediaStreamAudioSourceNode", undefined); + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { + restartTrack: (...args: unknown[]) => void; + setProcessor: (...args: unknown[]) => void; + }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + await flushPromises(); + + expect(micTrack.setProcessor).not.toHaveBeenCalled(); + expect(micTrack.restartTrack).toHaveBeenCalledWith( + expect.objectContaining({ + noiseSuppression: true, + }), + ); + }); + + it("stops RNNoise processor before restarting microphone track when disabling RNNoise", async () => { + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { + stopProcessor: () => void; + restartTrack: (...args: unknown[]) => void; + }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + await flushPromises(); + + vi.mocked(micTrack.stopProcessor).mockClear(); + vi.mocked(micTrack.restartTrack).mockClear(); + + rnnoiseNoiseSuppression.setValue(false); + await flushPromises(); + + expect(micTrack.stopProcessor).toHaveBeenCalledOnce(); + expect(micTrack.restartTrack).toHaveBeenCalledOnce(); + expect( + vi.mocked(micTrack.stopProcessor).mock.invocationCallOrder[0], + ).toBeLessThan( + vi.mocked(micTrack.restartTrack).mock.invocationCallOrder[0], + ); + }); + + it("updates active RNNoise processor preset when preset setting changes", async () => { + const micTrack = createMockLocalTrack( + Track.Source.Microphone, + ) as LocalTrack & { getProcessor: () => unknown }; + trackPublications.push({ + source: Track.Source.Microphone, + track: micTrack, + audioTrack: micTrack, + } as unknown as LocalTrackPublication); + localParticipant.emit( + ParticipantEvent.LocalTrackPublished, + trackPublications[0], + ); + + rnnoiseNoiseSuppression.setValue(true); + await flushPromises(); + + const processor = micTrack.getProcessor() as RNNoiseProcessor; + const setPresetSpy = vi.spyOn(processor, "setPreset"); + + rnnoiseNoiseSuppressionPreset.setValue("strong"); + await flushPromises(); + + expect(setPresetSpy).toHaveBeenCalledWith("strong"); + }); + }); }); describe("Bug fix", () => { diff --git a/src/state/CallViewModel/localMember/Publisher.ts b/src/state/CallViewModel/localMember/Publisher.ts index b7841c498b..d4425c04e4 100644 --- a/src/state/CallViewModel/localMember/Publisher.ts +++ b/src/state/CallViewModel/localMember/Publisher.ts @@ -7,6 +7,7 @@ Please see LICENSE in the repository root for full details. */ import { ConnectionState as LivekitConnectionState, + type LocalAudioTrack, type LocalTrackPublication, LocalVideoTrack, ParticipantEvent, @@ -14,9 +15,12 @@ import { Track, } from "livekit-client"; import { + combineLatest, + distinctUntilChanged, map, NEVER, type Observable, + skip, type Subscription, switchMap, } from "rxjs"; @@ -33,6 +37,16 @@ import { getUrlParams } from "../../../UrlParams.ts"; import { observeTrackReference$ } from "../../observeTrackReference"; import { type Connection } from "../remoteMembers/Connection.ts"; import { ObservableScope } from "../../ObservableScope.ts"; +import { + RNNoiseProcessor, + supportsRNNoiseProcessor, +} from "../../../audio/RNNoiseProcessor.ts"; +import { shouldEnableNativeNoiseSuppression } from "../../../audio/noiseSuppressionPolicy.ts"; +import { + rnnoiseNoiseSuppression, + rnnoiseNoiseSuppressionPreset, +} from "../../../settings/settings.ts"; +import type { RNNoiseSuppressionPreset } from "../../../audio/rnnoiseTypes.ts"; /** * A wrapper for a Connection object. @@ -48,6 +62,8 @@ export class Publisher { public shouldPublish = false; private readonly scope = new ObservableScope(); + private rnnoiseOperationQueue: Promise = Promise.resolve(); + private rnnoisePolicySyncedTrack: LocalAudioTrack | null = null; /** * Creates a new Publisher. @@ -73,6 +89,8 @@ export class Publisher { // Setup track processor syncing (blur) this.observeTrackProcessors(this.scope, room, trackerProcessorState$); + this.observeRNNoiseProcessor(this.scope, room, devices); + this.observeRNNoiseSettingRestart(this.scope, room, devices); // Observe media device changes and update LiveKit active devices accordingly this.observeMediaDevices(this.scope, devices, controlledAudioDevices); @@ -416,4 +434,161 @@ export class Publisher { ); trackProcessorSync(scope, track$, trackerProcessorState$); } + + private observeRNNoiseProcessor( + scope: ObservableScope, + room: LivekitRoom, + devices: MediaDevices, + ): void { + const microphoneTrack$ = scope.behavior( + observeTrackReference$( + room.localParticipant, + Track.Source.Microphone, + ).pipe( + map((trackRef) => { + const track = trackRef?.publication.track; + return track?.kind === Track.Kind.Audio + ? (track as LocalAudioTrack) + : null; + }), + ), + null, + ); + + combineLatest([ + microphoneTrack$, + rnnoiseNoiseSuppression.value$, + rnnoiseNoiseSuppressionPreset.value$, + ]) + .pipe( + scope.bind(), + distinctUntilChanged( + ([aTrack, _aEnabled, aPreset], [bTrack, _bEnabled, bPreset]) => { + return aTrack === bTrack && aPreset === bPreset; + }, + ), + ) + .subscribe(([microphoneTrack, rnnoiseEnabled, rnnoisePreset]) => { + const rnnoiseSupported = supportsRNNoiseProcessor(); + if (!microphoneTrack || !rnnoiseSupported) { + this.rnnoisePolicySyncedTrack = microphoneTrack; + return; + } + + const isNewMicrophoneTrack = + microphoneTrack !== this.rnnoisePolicySyncedTrack; + this.rnnoisePolicySyncedTrack = microphoneTrack; + + this.enqueueRNNoiseOperation(async () => { + if (rnnoiseEnabled && isNewMicrophoneTrack) { + await this.restartMicrophoneTrackForNoiseSuppressionPolicy( + microphoneTrack, + devices, + rnnoiseEnabled, + ); + } + await this.syncRNNoiseProcessor( + microphoneTrack, + rnnoiseEnabled, + rnnoisePreset, + ); + }); + }); + } + + private observeRNNoiseSettingRestart( + scope: ObservableScope, + room: LivekitRoom, + devices: MediaDevices, + ): void { + rnnoiseNoiseSuppression.value$ + .pipe(scope.bind(), distinctUntilChanged(), skip(1)) + .subscribe((rnnoiseEnabled) => { + const audioTrack = room.localParticipant.getTrackPublication( + Track.Source.Microphone, + )?.audioTrack; + if (!audioTrack) return; + + const rnnoiseSupported = supportsRNNoiseProcessor(); + this.enqueueRNNoiseOperation(async () => { + await this.restartMicrophoneTrackForNoiseSuppressionPolicy( + audioTrack, + devices, + rnnoiseEnabled, + ); + await this.syncRNNoiseProcessor( + audioTrack, + rnnoiseEnabled && rnnoiseSupported, + rnnoiseNoiseSuppressionPreset.getValue(), + ); + }); + }); + } + + private enqueueRNNoiseOperation(operation: () => Promise): void { + this.rnnoiseOperationQueue = this.rnnoiseOperationQueue.then(async () => { + try { + await operation(); + } catch (e) { + this.logger.error("Failed to process RNNoise operation", e); + } + }); + } + + private async restartMicrophoneTrackForNoiseSuppressionPolicy( + audioTrack: LocalAudioTrack, + devices: MediaDevices, + rnnoiseEnabled: boolean, + ): Promise { + const activeProcessor = audioTrack.getProcessor(); + if (activeProcessor?.name === "rnnoise-noise-suppression") { + await audioTrack.stopProcessor(); + } + + const { echoCancellation = true, noiseSuppression = true } = getUrlParams(); + await audioTrack.restartTrack({ + deviceId: devices.audioInput.selected$.value?.id, + echoCancellation, + noiseSuppression: shouldEnableNativeNoiseSuppression({ + urlNoiseSuppression: noiseSuppression, + rnnoiseEnabled, + rnnoiseSupported: supportsRNNoiseProcessor(), + }), + }); + } + + private async syncRNNoiseProcessor( + microphoneTrack: LocalAudioTrack, + rnnoiseEnabled: boolean, + rnnoisePreset: RNNoiseSuppressionPreset, + ): Promise { + try { + const processor = microphoneTrack.getProcessor(); + const rnnoiseActive = processor?.name === "rnnoise-noise-suppression"; + const rnnoiseProcessor = + processor instanceof RNNoiseProcessor ? processor : undefined; + + if (rnnoiseEnabled) { + if (rnnoiseProcessor) { + rnnoiseProcessor.setPreset(rnnoisePreset); + return; + } + + if (rnnoiseActive) { + await microphoneTrack.stopProcessor(); + } + await microphoneTrack.setProcessor(new RNNoiseProcessor(rnnoisePreset)); + } else if (rnnoiseActive) { + await microphoneTrack.stopProcessor(); + } + } catch (e) { + this.logger.error("Failed to apply RNNoise microphone processor", e); + if (rnnoiseEnabled && rnnoiseNoiseSuppression.getValue()) { + this.logger.warn( + "Disabling RNNoise setting after processor setup failure", + ); + rnnoiseNoiseSuppression.setValue(false); + } + } + } } diff --git a/src/types/jitsi-rnnoise-wasm.d.ts b/src/types/jitsi-rnnoise-wasm.d.ts new file mode 100644 index 0000000000..c7ef1b9d94 --- /dev/null +++ b/src/types/jitsi-rnnoise-wasm.d.ts @@ -0,0 +1,12 @@ +/* +Copyright 2026 Element Creations Ltd. + +SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial +Please see LICENSE in the repository root for full details. +*/ + +declare module "@jitsi/rnnoise-wasm/dist/rnnoise-sync.js" { + const createRNNWasmModuleSync: () => unknown; + + export default createRNNWasmModuleSync; +} diff --git a/yarn.lock b/yarn.lock index 4675d0e1ab..dccfb8ebf9 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3195,6 +3195,13 @@ __metadata: languageName: node linkType: hard +"@jitsi/rnnoise-wasm@npm:^0.2.1": + version: 0.2.1 + resolution: "@jitsi/rnnoise-wasm@npm:0.2.1" + checksum: 10c0/6e5b475b364660eb24c0fa9843a63040253c2ce4034de9313e811448f5c6dad2205a0f22d3a9ef15cbef3c808941b0681d238d53d5a853e194a4c88cdd5569b1 + languageName: node + linkType: hard + "@jridgewell/gen-mapping@npm:^0.3.12": version: 0.3.12 resolution: "@jridgewell/gen-mapping@npm:0.3.12" @@ -8377,6 +8384,7 @@ __metadata: "@fontsource/inter": "npm:^5.1.0" "@formatjs/intl-durationformat": "npm:^0.10.0" "@formatjs/intl-segmenter": "npm:^11.7.3" + "@jitsi/rnnoise-wasm": "npm:^0.2.1" "@livekit/components-core": "npm:^0.12.0" "@livekit/components-react": "npm:^2.0.0" "@livekit/protocol": "npm:^1.42.2"