From f0697d6dde2067bbb3b84ee3f8c061b712064949 Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 24 Feb 2026 02:36:58 +0900 Subject: [PATCH 01/10] add changeset --- .changes/pre-connect-audio-web | 1 + 1 file changed, 1 insertion(+) create mode 100644 .changes/pre-connect-audio-web diff --git a/.changes/pre-connect-audio-web b/.changes/pre-connect-audio-web new file mode 100644 index 000000000..ecf31ff9c --- /dev/null +++ b/.changes/pre-connect-audio-web @@ -0,0 +1 @@ +minor type="added" "Add web support for pre-connect audio buffer via AudioWorklet" From 9267e888e9d2e61984b7694681aeea58fa776699 Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 24 Feb 2026 03:11:13 +0900 Subject: [PATCH 02/10] change --- .changes/pre-connect-audio-web | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.changes/pre-connect-audio-web b/.changes/pre-connect-audio-web index ecf31ff9c..d21408104 100644 --- a/.changes/pre-connect-audio-web +++ b/.changes/pre-connect-audio-web @@ -1 +1 @@ -minor type="added" "Add web support for pre-connect audio buffer via AudioWorklet" +patch type="added" "Add web support for pre-connect audio buffer" From 51fde82792a373425db7da997725ef8b1e57b39e Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 24 Feb 2026 03:02:41 +0900 Subject: [PATCH 03/10] impl1 --- lib/livekit_client.dart | 1 + lib/src/preconnect/audio_frame_capture.dart | 3 + lib/src/track/local/local.dart | 71 +++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/lib/livekit_client.dart b/lib/livekit_client.dart index 27276733b..86b6cb607 100644 --- a/lib/livekit_client.dart +++ b/lib/livekit_client.dart @@ -41,6 +41,7 @@ export 'src/agent/room_agent.dart'; export 'src/participant/local.dart'; export 'src/participant/participant.dart'; export 'src/participant/remote.dart' hide ParticipantCreationResult; +export 'src/preconnect/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback; export 'src/preconnect/pre_connect_audio_buffer.dart'; export 'src/publication/local.dart'; export 'src/publication/remote.dart'; diff --git a/lib/src/preconnect/audio_frame_capture.dart b/lib/src/preconnect/audio_frame_capture.dart index ba3f983a3..c36ef1444 100644 --- a/lib/src/preconnect/audio_frame_capture.dart +++ b/lib/src/preconnect/audio_frame_capture.dart @@ -56,5 +56,8 @@ abstract class AudioFrameCapture { Future stop(); } +/// Callback for receiving audio frames. +typedef AudioFrameCallback = void Function(AudioFrame frame); + /// Factory that returns the platform-appropriate implementation. AudioFrameCapture createAudioFrameCapture() => createAudioFrameCaptureImpl(); diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart index 2ce82883b..079968c26 100644 --- a/lib/src/track/local/local.dart +++ b/lib/src/track/local/local.dart @@ -33,6 +33,7 @@ import '../processor.dart'; import '../processor_native.dart' if (dart.library.js_interop) '../processor_web.dart'; import '../remote/audio.dart'; import '../remote/video.dart'; +import '../../preconnect/audio_frame_capture.dart'; import '../track.dart'; import 'audio.dart'; import 'video.dart'; @@ -60,6 +61,74 @@ mixin VideoTrack on Track { /// Used to group [LocalAudioTrack] and [RemoteAudioTrack]. mixin AudioTrack on Track { + AudioFrameCapture? _audioCapture; + final List _audioRenderers = []; + StreamSubscription? _audioFrameSubscription; + + /// Register a callback to receive raw PCM audio frames from this track. + /// + /// Returns a function that, when called, removes this renderer. + /// When the last renderer is removed, audio capture stops automatically. + CancelListenFunc addAudioRenderer({ + required AudioFrameCallback onFrame, + int sampleRate = 24000, + int channels = 1, + String commonFormat = 'int16', + }) { + _audioRenderers.add(onFrame); + + if (_audioRenderers.length == 1) { + _startAudioCapture( + sampleRate: sampleRate, + channels: channels, + commonFormat: commonFormat, + ); + } + + return () async { + _audioRenderers.remove(onFrame); + if (_audioRenderers.isEmpty) { + await _stopAudioCapture(); + } + }; + } + + Future _startAudioCapture({ + required int sampleRate, + required int channels, + required String commonFormat, + }) async { + final capture = createAudioFrameCapture(); + _audioCapture = capture; + final rendererId = Track.uuid.v4(); + + final result = await capture.start( + track: mediaStreamTrack, + rendererId: rendererId, + sampleRate: sampleRate, + channels: channels, + commonFormat: commonFormat, + ); + + if (!result) { + logger.warning('Failed to start audio capture for renderer'); + return; + } + + _audioFrameSubscription = capture.frameStream.listen((frame) { + for (final renderer in List.of(_audioRenderers)) { + renderer(frame); + } + }); + } + + Future _stopAudioCapture() async { + await _audioFrameSubscription?.cancel(); + _audioFrameSubscription = null; + await _audioCapture?.stop(); + _audioCapture = null; + } + @override Future onStarted() async { logger.fine('AudioTrack.onStarted()'); @@ -68,6 +137,8 @@ mixin AudioTrack on Track { @override Future onStopped() async { logger.fine('AudioTrack.onStopped()'); + await _stopAudioCapture(); + _audioRenderers.clear(); } } From 349b3ebe4647d89672c107375d73a3dfc963af1d Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 24 Feb 2026 03:05:22 +0900 Subject: [PATCH 04/10] refactor --- lib/livekit_client.dart | 2 +- lib/src/{preconnect => audio}/audio_frame_capture.dart | 0 lib/src/{preconnect => audio}/audio_frame_capture_native.dart | 0 lib/src/{preconnect => audio}/audio_frame_capture_web.dart | 2 +- lib/src/{support => audio}/audio_pcm_utils.dart | 0 lib/src/preconnect/pre_connect_audio_buffer.dart | 2 +- lib/src/track/local/local.dart | 2 +- test/preconnect/audio_frame_capture_test.dart | 4 ++-- 8 files changed, 6 insertions(+), 6 deletions(-) rename lib/src/{preconnect => audio}/audio_frame_capture.dart (100%) rename lib/src/{preconnect => audio}/audio_frame_capture_native.dart (100%) rename lib/src/{preconnect => audio}/audio_frame_capture_web.dart (99%) rename lib/src/{support => audio}/audio_pcm_utils.dart (100%) diff --git a/lib/livekit_client.dart b/lib/livekit_client.dart index 86b6cb607..a16aac6dd 100644 --- a/lib/livekit_client.dart +++ b/lib/livekit_client.dart @@ -41,7 +41,7 @@ export 'src/agent/room_agent.dart'; export 'src/participant/local.dart'; export 'src/participant/participant.dart'; export 'src/participant/remote.dart' hide ParticipantCreationResult; -export 'src/preconnect/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback; +export 'src/audio/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback; export 'src/preconnect/pre_connect_audio_buffer.dart'; export 'src/publication/local.dart'; export 'src/publication/remote.dart'; diff --git a/lib/src/preconnect/audio_frame_capture.dart b/lib/src/audio/audio_frame_capture.dart similarity index 100% rename from lib/src/preconnect/audio_frame_capture.dart rename to lib/src/audio/audio_frame_capture.dart diff --git a/lib/src/preconnect/audio_frame_capture_native.dart b/lib/src/audio/audio_frame_capture_native.dart similarity index 100% rename from lib/src/preconnect/audio_frame_capture_native.dart rename to lib/src/audio/audio_frame_capture_native.dart diff --git a/lib/src/preconnect/audio_frame_capture_web.dart b/lib/src/audio/audio_frame_capture_web.dart similarity index 99% rename from lib/src/preconnect/audio_frame_capture_web.dart rename to lib/src/audio/audio_frame_capture_web.dart index a44e9b4eb..7bc21ee56 100644 --- a/lib/src/preconnect/audio_frame_capture_web.dart +++ b/lib/src/audio/audio_frame_capture_web.dart @@ -22,8 +22,8 @@ import 'package:flutter_webrtc/flutter_webrtc.dart' show MediaStreamTrack; import 'package:web/web.dart' as web; import '../logger.dart'; -import '../support/audio_pcm_utils.dart'; import 'audio_frame_capture.dart'; +import 'audio_pcm_utils.dart'; /// JavaScript source for the AudioWorkletProcessor. /// diff --git a/lib/src/support/audio_pcm_utils.dart b/lib/src/audio/audio_pcm_utils.dart similarity index 100% rename from lib/src/support/audio_pcm_utils.dart rename to lib/src/audio/audio_pcm_utils.dart diff --git a/lib/src/preconnect/pre_connect_audio_buffer.dart b/lib/src/preconnect/pre_connect_audio_buffer.dart index 0dedf25ea..ae222dc43 100644 --- a/lib/src/preconnect/pre_connect_audio_buffer.dart +++ b/lib/src/preconnect/pre_connect_audio_buffer.dart @@ -19,6 +19,7 @@ import 'package:flutter/foundation.dart' show kIsWeb; import 'package:flutter_webrtc/flutter_webrtc.dart' as webrtc; import 'package:uuid/uuid.dart'; +import '../audio/audio_frame_capture.dart'; import '../core/room.dart'; import '../events.dart'; import '../logger.dart'; @@ -29,7 +30,6 @@ import '../track/local/audio.dart'; import '../types/data_stream.dart'; import '../types/other.dart'; import '../types/participant_state.dart'; -import 'audio_frame_capture.dart'; typedef PreConnectOnError = void Function(Object error); diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart index 079968c26..baa39ca93 100644 --- a/lib/src/track/local/local.dart +++ b/lib/src/track/local/local.dart @@ -20,6 +20,7 @@ import 'package:flutter/material.dart'; import 'package:flutter_webrtc/flutter_webrtc.dart' as rtc; import 'package:meta/meta.dart'; +import '../../audio/audio_frame_capture.dart'; import '../../events.dart'; import '../../exceptions.dart'; import '../../extensions.dart'; @@ -33,7 +34,6 @@ import '../processor.dart'; import '../processor_native.dart' if (dart.library.js_interop) '../processor_web.dart'; import '../remote/audio.dart'; import '../remote/video.dart'; -import '../../preconnect/audio_frame_capture.dart'; import '../track.dart'; import 'audio.dart'; import 'video.dart'; diff --git a/test/preconnect/audio_frame_capture_test.dart b/test/preconnect/audio_frame_capture_test.dart index d4a536825..1183b10f6 100644 --- a/test/preconnect/audio_frame_capture_test.dart +++ b/test/preconnect/audio_frame_capture_test.dart @@ -3,8 +3,8 @@ import 'dart:typed_data'; import 'package:flutter_test/flutter_test.dart'; -import 'package:livekit_client/src/preconnect/audio_frame_capture.dart'; -import 'package:livekit_client/src/support/audio_pcm_utils.dart'; +import 'package:livekit_client/src/audio/audio_frame_capture.dart'; +import 'package:livekit_client/src/audio/audio_pcm_utils.dart'; import 'package:livekit_client/src/support/byte_ring_buffer.dart'; /// A mock AudioFrameCapture that emits frames from a StreamController. From f992e7bd4bcc171983354ba4e54e15282f868baa Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 3 Mar 2026 00:12:20 +0900 Subject: [PATCH 05/10] fix: wrap _startAudioCapture in unawaited to fix discarded_futures lint --- lib/src/track/local/local.dart | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart index baa39ca93..a84e41da7 100644 --- a/lib/src/track/local/local.dart +++ b/lib/src/track/local/local.dart @@ -78,11 +78,11 @@ mixin AudioTrack on Track { _audioRenderers.add(onFrame); if (_audioRenderers.length == 1) { - _startAudioCapture( + unawaited(_startAudioCapture( sampleRate: sampleRate, channels: channels, commonFormat: commonFormat, - ); + )); } return () async { From 4ce076574c9e04a8c5f099aa75339e3827a5be78 Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:15:42 +0900 Subject: [PATCH 06/10] changes --- .changes/audio-renderer-api | 1 + .changes/pre-connect-audio-web | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 .changes/audio-renderer-api delete mode 100644 .changes/pre-connect-audio-web diff --git a/.changes/audio-renderer-api b/.changes/audio-renderer-api new file mode 100644 index 000000000..ab40d5c28 --- /dev/null +++ b/.changes/audio-renderer-api @@ -0,0 +1 @@ +minor type="added" "Add audio renderer API for receiving raw audio frames" diff --git a/.changes/pre-connect-audio-web b/.changes/pre-connect-audio-web deleted file mode 100644 index d21408104..000000000 --- a/.changes/pre-connect-audio-web +++ /dev/null @@ -1 +0,0 @@ -patch type="added" "Add web support for pre-connect audio buffer" From db68a20271244882b80dc70774d9215f01ccfbc1 Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:21:23 +0900 Subject: [PATCH 07/10] options & format p1 --- lib/livekit_client.dart | 2 +- lib/src/audio/audio_frame_capture.dart | 39 ++++++++- lib/src/audio/audio_frame_capture_native.dart | 7 +- lib/src/audio/audio_frame_capture_web.dart | 10 +-- .../preconnect/pre_connect_audio_buffer.dart | 2 +- lib/src/track/local/local.dart | 82 ++++++++++--------- test/preconnect/audio_frame_capture_test.dart | 34 ++++---- 7 files changed, 107 insertions(+), 69 deletions(-) diff --git a/lib/livekit_client.dart b/lib/livekit_client.dart index a16aac6dd..cb0408d67 100644 --- a/lib/livekit_client.dart +++ b/lib/livekit_client.dart @@ -41,7 +41,7 @@ export 'src/agent/room_agent.dart'; export 'src/participant/local.dart'; export 'src/participant/participant.dart'; export 'src/participant/remote.dart' hide ParticipantCreationResult; -export 'src/audio/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback; +export 'src/audio/audio_frame_capture.dart' show AudioFormat, AudioFrame, AudioFrameCallback, AudioRendererOptions; export 'src/preconnect/pre_connect_audio_buffer.dart'; export 'src/publication/local.dart'; export 'src/publication/remote.dart'; diff --git a/lib/src/audio/audio_frame_capture.dart b/lib/src/audio/audio_frame_capture.dart index c36ef1444..b92f776ac 100644 --- a/lib/src/audio/audio_frame_capture.dart +++ b/lib/src/audio/audio_frame_capture.dart @@ -18,18 +18,27 @@ import 'package:flutter_webrtc/flutter_webrtc.dart' show MediaStreamTrack; import 'audio_frame_capture_native.dart' if (dart.library.js_interop) 'audio_frame_capture_web.dart'; +/// PCM sample format for audio frame capture. +enum AudioFormat { + Int16('int16'), + Float32('float32'); + + final String value; + const AudioFormat(this.value); +} + /// A single frame of raw PCM audio data. class AudioFrame { final int sampleRate; final int channels; final Uint8List data; - final String commonFormat; + final AudioFormat format; const AudioFrame({ required this.sampleRate, required this.channels, required this.data, - required this.commonFormat, + required this.format, }); } @@ -49,7 +58,7 @@ abstract class AudioFrameCapture { required String rendererId, required int sampleRate, required int channels, - required String commonFormat, + required AudioFormat format, }); /// Stop capturing and release resources. @@ -59,5 +68,29 @@ abstract class AudioFrameCapture { /// Callback for receiving audio frames. typedef AudioFrameCallback = void Function(AudioFrame frame); +/// Options for configuring audio frame capture format. +class AudioRendererOptions { + final int sampleRate; + final int channels; + final AudioFormat format; + + const AudioRendererOptions({ + this.sampleRate = 24000, + this.channels = 1, + this.format = AudioFormat.Int16, + }); + + @override + bool operator ==(Object other) => + identical(this, other) || + other is AudioRendererOptions && + sampleRate == other.sampleRate && + channels == other.channels && + format == other.format; + + @override + int get hashCode => Object.hash(sampleRate, channels, format); +} + /// Factory that returns the platform-appropriate implementation. AudioFrameCapture createAudioFrameCapture() => createAudioFrameCaptureImpl(); diff --git a/lib/src/audio/audio_frame_capture_native.dart b/lib/src/audio/audio_frame_capture_native.dart index bea7587b2..a7b06a1d9 100644 --- a/lib/src/audio/audio_frame_capture_native.dart +++ b/lib/src/audio/audio_frame_capture_native.dart @@ -39,13 +39,13 @@ class AudioFrameCaptureNative implements AudioFrameCapture { required String rendererId, required int sampleRate, required int channels, - required String commonFormat, + required AudioFormat format, }) async { final result = await Native.startAudioRenderer( trackId: track.id!, rendererId: rendererId, format: { - 'commonFormat': commonFormat, + 'commonFormat': format.value, 'sampleRate': sampleRate, 'channels': channels, }, @@ -58,11 +58,12 @@ class AudioFrameCaptureNative implements AudioFrameCapture { _eventChannel = EventChannel('io.livekit.audio.renderer/channel-$rendererId'); _streamSubscription = _eventChannel?.receiveBroadcastStream().listen((event) { try { + final rawFormat = event['commonFormat'] as String?; _controller.add(AudioFrame( sampleRate: event['sampleRate'] as int, channels: event['channels'] as int, data: event['data'] as Uint8List, - commonFormat: (event['commonFormat'] as String?) ?? commonFormat, + format: rawFormat == AudioFormat.Float32.value ? AudioFormat.Float32 : AudioFormat.Int16, )); } catch (e) { logger.warning('[AudioFrameCapture] Error parsing native event: $e'); diff --git a/lib/src/audio/audio_frame_capture_web.dart b/lib/src/audio/audio_frame_capture_web.dart index 7bc21ee56..2e7ad4581 100644 --- a/lib/src/audio/audio_frame_capture_web.dart +++ b/lib/src/audio/audio_frame_capture_web.dart @@ -63,7 +63,7 @@ class AudioFrameCaptureWeb implements AudioFrameCapture { web.AudioWorkletNode? _workletNode; web.AudioNode? _sourceNode; StreamController? _controller; - String _targetFormat = 'int16'; + AudioFormat _targetFormat = AudioFormat.Int16; int _targetChannels = 1; @override @@ -75,9 +75,9 @@ class AudioFrameCaptureWeb implements AudioFrameCapture { required String rendererId, required int sampleRate, required int channels, - required String commonFormat, + required AudioFormat format, }) async { - _targetFormat = commonFormat; + _targetFormat = format; _targetChannels = channels; _controller ??= StreamController.broadcast(); @@ -144,7 +144,7 @@ class AudioFrameCaptureWeb implements AudioFrameCapture { final srcFloat32 = samplesBuffer.asFloat32List(); final Uint8List bytes; - if (_targetFormat == 'float32') { + if (_targetFormat == AudioFormat.Float32) { bytes = float32ToFloat32Bytes(srcFloat32, channels, outChannels, frames); } else { bytes = float32ToInt16Bytes(srcFloat32, channels, outChannels, frames); @@ -154,7 +154,7 @@ class AudioFrameCaptureWeb implements AudioFrameCapture { sampleRate: actualSampleRate, channels: outChannels, data: bytes, - commonFormat: _targetFormat, + format: _targetFormat, )); } catch (e) { logger.warning('[AudioFrameCapture] Error processing worklet frame: $e'); diff --git a/lib/src/preconnect/pre_connect_audio_buffer.dart b/lib/src/preconnect/pre_connect_audio_buffer.dart index ae222dc43..47b3f22e4 100644 --- a/lib/src/preconnect/pre_connect_audio_buffer.dart +++ b/lib/src/preconnect/pre_connect_audio_buffer.dart @@ -132,7 +132,7 @@ class PreConnectAudioBuffer { rendererId: rendererId, sampleRate: _requestSampleRate, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); if (!result) { diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart index a84e41da7..db3b0550b 100644 --- a/lib/src/track/local/local.dart +++ b/lib/src/track/local/local.dart @@ -61,53 +61,49 @@ mixin VideoTrack on Track { /// Used to group [LocalAudioTrack] and [RemoteAudioTrack]. mixin AudioTrack on Track { - AudioFrameCapture? _audioCapture; - final List _audioRenderers = []; - StreamSubscription? _audioFrameSubscription; + final Map _captureGroups = {}; /// Register a callback to receive raw PCM audio frames from this track. /// + /// Multiple renderers with different [options] each get their own capture + /// pipeline. Renderers sharing the same options share a single capture. + /// /// Returns a function that, when called, removes this renderer. - /// When the last renderer is removed, audio capture stops automatically. + /// When the last renderer for a given options config is removed, that + /// capture stops automatically. CancelListenFunc addAudioRenderer({ required AudioFrameCallback onFrame, - int sampleRate = 24000, - int channels = 1, - String commonFormat = 'int16', + AudioRendererOptions options = const AudioRendererOptions(), }) { - _audioRenderers.add(onFrame); - - if (_audioRenderers.length == 1) { - unawaited(_startAudioCapture( - sampleRate: sampleRate, - channels: channels, - commonFormat: commonFormat, - )); - } + final group = _captureGroups.putIfAbsent(options, () { + final g = _AudioCaptureGroup(); + unawaited(_startCaptureGroup(g, options)); + return g; + }); + group.renderers.add(onFrame); return () async { - _audioRenderers.remove(onFrame); - if (_audioRenderers.isEmpty) { - await _stopAudioCapture(); + group.renderers.remove(onFrame); + if (group.renderers.isEmpty) { + _captureGroups.remove(options); + await _stopCaptureGroup(group); } }; } - Future _startAudioCapture({ - required int sampleRate, - required int channels, - required String commonFormat, - }) async { + Future _startCaptureGroup( + _AudioCaptureGroup group, + AudioRendererOptions options, + ) async { final capture = createAudioFrameCapture(); - _audioCapture = capture; - final rendererId = Track.uuid.v4(); + group.capture = capture; final result = await capture.start( track: mediaStreamTrack, - rendererId: rendererId, - sampleRate: sampleRate, - channels: channels, - commonFormat: commonFormat, + rendererId: Track.uuid.v4(), + sampleRate: options.sampleRate, + channels: options.channels, + format: options.format, ); if (!result) { @@ -115,18 +111,18 @@ mixin AudioTrack on Track { return; } - _audioFrameSubscription = capture.frameStream.listen((frame) { - for (final renderer in List.of(_audioRenderers)) { + group.subscription = capture.frameStream.listen((frame) { + for (final renderer in List.of(group.renderers)) { renderer(frame); } }); } - Future _stopAudioCapture() async { - await _audioFrameSubscription?.cancel(); - _audioFrameSubscription = null; - await _audioCapture?.stop(); - _audioCapture = null; + Future _stopCaptureGroup(_AudioCaptureGroup group) async { + await group.subscription?.cancel(); + group.subscription = null; + await group.capture?.stop(); + group.capture = null; } @override @@ -137,11 +133,19 @@ mixin AudioTrack on Track { @override Future onStopped() async { logger.fine('AudioTrack.onStopped()'); - await _stopAudioCapture(); - _audioRenderers.clear(); + for (final group in _captureGroups.values) { + await _stopCaptureGroup(group); + } + _captureGroups.clear(); } } +class _AudioCaptureGroup { + AudioFrameCapture? capture; + StreamSubscription? subscription; + final List renderers = []; +} + /// Base class for [LocalAudioTrack] and [LocalVideoTrack]. abstract class LocalTrack extends Track { /// Options used for this track diff --git a/test/preconnect/audio_frame_capture_test.dart b/test/preconnect/audio_frame_capture_test.dart index 1183b10f6..384975432 100644 --- a/test/preconnect/audio_frame_capture_test.dart +++ b/test/preconnect/audio_frame_capture_test.dart @@ -22,7 +22,7 @@ class MockAudioFrameCapture implements AudioFrameCapture { required String rendererId, required int sampleRate, required int channels, - required String commonFormat, + required AudioFormat format, }) async { started = true; return true; @@ -75,12 +75,12 @@ void main() { sampleRate: 48000, channels: 1, data: int16Bytes([100, -200, 300]), - commonFormat: 'int16', + format: AudioFormat.Int16, ); expect(frame.sampleRate, 48000); expect(frame.channels, 1); - expect(frame.commonFormat, 'int16'); + expect(frame.format, AudioFormat.Int16); expect(frame.data.length, 6); // 3 samples * 2 bytes }); }); @@ -93,7 +93,7 @@ void main() { rendererId: 'test-id', sampleRate: 24000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); expect(result, true); @@ -107,7 +107,7 @@ void main() { rendererId: 'test-id', sampleRate: 24000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); final frames = []; @@ -117,14 +117,14 @@ void main() { sampleRate: 24000, channels: 1, data: int16Bytes([1000, -1000]), - commonFormat: 'int16', + format: AudioFormat.Int16, )); capture.emitFrame(AudioFrame( sampleRate: 24000, channels: 1, data: int16Bytes([2000, -2000]), - commonFormat: 'int16', + format: AudioFormat.Int16, )); // Let microtasks run. @@ -145,7 +145,7 @@ void main() { rendererId: 'test-id', sampleRate: 24000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); await capture.stop(); @@ -253,7 +253,7 @@ void main() { rendererId: 'test-id', sampleRate: 24000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); int? capturedSampleRate; @@ -272,7 +272,7 @@ void main() { sampleRate: 48000, channels: 1, data: int16Bytes(samples), - commonFormat: 'int16', + format: AudioFormat.Int16, )); } @@ -302,7 +302,7 @@ void main() { rendererId: 'test-id', sampleRate: 24000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); bool overflowed = false; @@ -317,13 +317,13 @@ void main() { sampleRate: 24000, channels: 1, data: Uint8List(60), - commonFormat: 'int16', + format: AudioFormat.Int16, )); capture.emitFrame(AudioFrame( sampleRate: 24000, channels: 1, data: Uint8List(60), - commonFormat: 'int16', + format: AudioFormat.Int16, )); await Future.delayed(Duration.zero); @@ -344,7 +344,7 @@ void main() { rendererId: 'test-id', sampleRate: 48000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); // Simulate what the web implementation does: receive float32 from @@ -361,7 +361,7 @@ void main() { sampleRate: 48000, channels: 1, data: samples.buffer.asUint8List(), - commonFormat: 'float32', + format: AudioFormat.Float32, )); await Future.delayed(Duration.zero); @@ -392,7 +392,7 @@ void main() { rendererId: 'test-id', sampleRate: 48000, channels: 1, - commonFormat: 'int16', + format: AudioFormat.Int16, ); final sub = capture.frameStream.listen((frame) { @@ -408,7 +408,7 @@ void main() { sampleRate: 48000, channels: 2, data: stereo.buffer.asUint8List(), - commonFormat: 'float32', + format: AudioFormat.Float32, )); await Future.delayed(Duration.zero); From b66e4c2505bc8d23b8a14254409b137773b017d2 Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:32:43 +0900 Subject: [PATCH 08/10] patch race --- lib/src/track/local/local.dart | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart index db3b0550b..afc464990 100644 --- a/lib/src/track/local/local.dart +++ b/lib/src/track/local/local.dart @@ -77,7 +77,7 @@ mixin AudioTrack on Track { }) { final group = _captureGroups.putIfAbsent(options, () { final g = _AudioCaptureGroup(); - unawaited(_startCaptureGroup(g, options)); + g.startFuture = _startCaptureGroup(g, options); return g; }); group.renderers.add(onFrame); @@ -119,6 +119,7 @@ mixin AudioTrack on Track { } Future _stopCaptureGroup(_AudioCaptureGroup group) async { + await group.startFuture; await group.subscription?.cancel(); group.subscription = null; await group.capture?.stop(); @@ -143,6 +144,7 @@ mixin AudioTrack on Track { class _AudioCaptureGroup { AudioFrameCapture? capture; StreamSubscription? subscription; + Future? startFuture; final List renderers = []; } From ddf31984fc81eef6574ea2ad473b82229ce18efc Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Tue, 3 Mar 2026 13:37:13 +0900 Subject: [PATCH 09/10] ref --- lib/src/track/local/local.dart | 85 ++++++++++++++++++---------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart index afc464990..9e42972d8 100644 --- a/lib/src/track/local/local.dart +++ b/lib/src/track/local/local.dart @@ -75,31 +75,55 @@ mixin AudioTrack on Track { required AudioFrameCallback onFrame, AudioRendererOptions options = const AudioRendererOptions(), }) { - final group = _captureGroups.putIfAbsent(options, () { - final g = _AudioCaptureGroup(); - g.startFuture = _startCaptureGroup(g, options); - return g; - }); + final group = _captureGroups.putIfAbsent( + options, + () => _AudioCaptureGroup(track: mediaStreamTrack, options: options), + ); group.renderers.add(onFrame); return () async { group.renderers.remove(onFrame); if (group.renderers.isEmpty) { _captureGroups.remove(options); - await _stopCaptureGroup(group); + await group.stop(); } }; } - Future _startCaptureGroup( - _AudioCaptureGroup group, - AudioRendererOptions options, - ) async { + @override + Future onStarted() async { + logger.fine('AudioTrack.onStarted()'); + } + + @override + Future onStopped() async { + logger.fine('AudioTrack.onStopped()'); + for (final group in _captureGroups.values) { + await group.stop(); + } + _captureGroups.clear(); + } +} + +class _AudioCaptureGroup { + final List renderers = []; + late final Future _startFuture; + AudioFrameCapture? _capture; + StreamSubscription? _subscription; + + _AudioCaptureGroup({ + required rtc.MediaStreamTrack track, + required AudioRendererOptions options, + }) { + _startFuture = _start(track, options); + } + + Future _start(rtc.MediaStreamTrack track, AudioRendererOptions options) async { final capture = createAudioFrameCapture(); - group.capture = capture; + _capture = capture; final result = await capture.start( - track: mediaStreamTrack, + track: track, rendererId: Track.uuid.v4(), sampleRate: options.sampleRate, channels: options.channels, @@ -111,43 +135,22 @@ mixin AudioTrack on Track { return; } - group.subscription = capture.frameStream.listen((frame) { - for (final renderer in List.of(group.renderers)) { + _subscription = capture.frameStream.listen((frame) { + for (final renderer in List.of(renderers)) { renderer(frame); } }); } - Future _stopCaptureGroup(_AudioCaptureGroup group) async { - await group.startFuture; - await group.subscription?.cancel(); - group.subscription = null; - await group.capture?.stop(); - group.capture = null; - } - - @override - Future onStarted() async { - logger.fine('AudioTrack.onStarted()'); - } - - @override - Future onStopped() async { - logger.fine('AudioTrack.onStopped()'); - for (final group in _captureGroups.values) { - await _stopCaptureGroup(group); - } - _captureGroups.clear(); + Future stop() async { + await _startFuture; + await _subscription?.cancel(); + _subscription = null; + await _capture?.stop(); + _capture = null; } } -class _AudioCaptureGroup { - AudioFrameCapture? capture; - StreamSubscription? subscription; - Future? startFuture; - final List renderers = []; -} - /// Base class for [LocalAudioTrack] and [LocalVideoTrack]. abstract class LocalTrack extends Track { /// Options used for this track From 6218bb2c85ea34b61b347d8d49033325c194590f Mon Sep 17 00:00:00 2001 From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com> Date: Wed, 4 Mar 2026 11:39:00 +0900 Subject: [PATCH 10/10] android resampler --- .../kotlin/io/livekit/plugin/AudioRenderer.kt | 151 ++++++-------- .../io/livekit/plugin/AudioResampler.kt | 142 +++++++++++++ .../io/livekit/plugin/AudioResamplerTest.kt | 193 ++++++++++++++++++ 3 files changed, 394 insertions(+), 92 deletions(-) create mode 100644 android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt create mode 100644 android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt diff --git a/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt b/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt index 1138723bf..43b5646ba 100644 --- a/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt +++ b/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt @@ -110,12 +110,9 @@ class AudioRenderer( } /** - * Converts audio data to raw interleaved bytes. + * Converts audio data to raw interleaved bytes with resampling. * - * If source and target channel counts match, data is copied directly. - * If target requests fewer channels, the first channels are kept and interleaved. - * - * Sends raw byte arrays instead of boxed sample lists. + * Pipeline: read int16 → resample → channel reduce → format convert (int16/float32) */ private fun convertAudioData( audioData: ByteBuffer, @@ -138,16 +135,7 @@ class AudioRenderer( return null } - val bytesPerSample = 2 // 16-bit - val bytesPerFrame = numberOfChannels * bytesPerSample - if (bytesPerFrame <= 0) { - logDroppedFrame("Invalid bytesPerFrame: $bytesPerFrame") - return null - } - - val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1) - val outChannels = requestedChannels.coerceAtMost(numberOfChannels) - + val bytesPerFrame = numberOfChannels * 2 val buffer = audioData.duplicate() buffer.order(ByteOrder.LITTLE_ENDIAN) buffer.rewind() @@ -159,7 +147,7 @@ class AudioRenderer( } val expectedBytes = numberOfFrames.toLong() * bytesPerFrame.toLong() - val frameLength = if (expectedBytes <= availableBytes.toLong()) { + val srcFrames = if (expectedBytes <= availableBytes.toLong()) { numberOfFrames } else { val availableFrames = availableBytes / bytesPerFrame @@ -173,24 +161,71 @@ class AudioRenderer( availableFrames } + // Step 1: Read source int16 samples into ShortArray + val src = ShortArray(srcFrames * numberOfChannels) + for (i in src.indices) { + src[i] = buffer.short + } + + // Step 2: Resample to target sample rate + val resampleResult = AudioResampler.resample( + src, srcFrames, sampleRate, targetFormat.sampleRate, numberOfChannels + ) + val resampled = resampleResult.samples + val outFrames = resampleResult.frameCount + + if (outFrames <= 0) { + logDroppedFrame("Resampled frame count is 0") + return null + } + + // Step 3: Channel reduction + format conversion + val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1) + val outChannels = requestedChannels.coerceAtMost(numberOfChannels) + val result = mutableMapOf( - "sampleRate" to sampleRate, + "sampleRate" to targetFormat.sampleRate, "channels" to outChannels, - "frameLength" to frameLength, + "frameLength" to outFrames, ) when (targetFormat.commonFormat) { - "int16" -> { - result["commonFormat"] = "int16" - result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength) - } "float32" -> { result["commonFormat"] = "float32" - result["data"] = extractAsFloat32Bytes(buffer, numberOfChannels, outChannels, frameLength) + val out = ByteArray(outFrames * outChannels * 4) + val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN) + for (f in 0 until outFrames) { + for (ch in 0 until outChannels) { + val sample = resampled[f * numberOfChannels + ch].toFloat() / Short.MAX_VALUE + outBuf.putFloat((f * outChannels + ch) * 4, sample) + } + } + result["data"] = out } else -> { result["commonFormat"] = "int16" - result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength) + if (outChannels == numberOfChannels) { + // Fast path: no channel reduction — bulk copy resampled data + val out = ByteArray(outFrames * outChannels * 2) + val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN) + for (i in 0 until outFrames * outChannels) { + outBuf.putShort(i * 2, resampled[i]) + } + result["data"] = out + } else { + // Channel reduction: keep first outChannels + val out = ByteArray(outFrames * outChannels * 2) + val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN) + for (f in 0 until outFrames) { + for (ch in 0 until outChannels) { + outBuf.putShort( + (f * outChannels + ch) * 2, + resampled[f * numberOfChannels + ch] + ) + } + } + result["data"] = out + } } } @@ -203,74 +238,6 @@ class AudioRenderer( Log.w(TAG, "Dropping audio frame #$droppedFrameCount for rendererId=$rendererId: $reason") } } - - /** - * Extracts int16 PCM bytes from an int16 source buffer. - * - * Fast path when channel counts match (direct copy). - * Otherwise keeps only the first [outChannels] channels, interleaved. - */ - private fun extractAsInt16Bytes( - buffer: ByteBuffer, - srcChannels: Int, - outChannels: Int, - numberOfFrames: Int - ): ByteArray { - // Fast path: matching channel count — bulk copy. - if (srcChannels == outChannels) { - val totalBytes = numberOfFrames * outChannels * 2 - val out = ByteArray(totalBytes) - buffer.get(out, 0, totalBytes.coerceAtMost(buffer.remaining())) - return out - } - - // Channel reduction: keep first outChannels. - val out = ByteArray(numberOfFrames * outChannels * 2) - val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN) - - for (frame in 0 until numberOfFrames) { - val srcOffset = frame * srcChannels * 2 - for (ch in 0 until outChannels) { - val byteIndex = srcOffset + ch * 2 - if (byteIndex + 1 < buffer.capacity()) { - buffer.position(byteIndex) - outBuf.putShort((frame * outChannels + ch) * 2, buffer.short) - } - } - } - - return out - } - - /** - * Converts int16 PCM source to float32 bytes. - * - * Each int16 sample is scaled to the [-1.0, 1.0] range. - * Only the first [outChannels] channels are kept. - */ - private fun extractAsFloat32Bytes( - buffer: ByteBuffer, - srcChannels: Int, - outChannels: Int, - numberOfFrames: Int - ): ByteArray { - val out = ByteArray(numberOfFrames * outChannels * 4) - val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN) - - for (frame in 0 until numberOfFrames) { - val srcOffset = frame * srcChannels * 2 - for (ch in 0 until outChannels) { - val byteIndex = srcOffset + ch * 2 - if (byteIndex + 1 < buffer.capacity()) { - buffer.position(byteIndex) - val sampleFloat = buffer.short.toFloat() / Short.MAX_VALUE - outBuf.putFloat((frame * outChannels + ch) * 4, sampleFloat) - } - } - } - - return out - } } /** diff --git a/android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt b/android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt new file mode 100644 index 000000000..f008bad77 --- /dev/null +++ b/android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt @@ -0,0 +1,142 @@ +/* + * Copyright 2024 LiveKit, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.livekit.plugin + +/** + * Pure audio resampler for interleaved int16 PCM data. + * + * - Same rate: passthrough (returns input array as-is) + * - Upsampling: linear interpolation between adjacent samples + * - Downsampling: box filter (averages source samples per output sample) to prevent aliasing + */ +object AudioResampler { + + /** + * Resample interleaved int16 PCM audio. + * + * @param src Interleaved int16 samples (channels interleaved per frame) + * @param srcFrames Number of frames in [src] (total samples = srcFrames * channels) + * @param srcRate Source sample rate in Hz + * @param targetRate Target sample rate in Hz + * @param channels Number of interleaved channels + * @return Resampled interleaved int16 samples. Returns [src] unchanged when rates match. + */ + fun resample( + src: ShortArray, + srcFrames: Int, + srcRate: Int, + targetRate: Int, + channels: Int + ): ResampleResult { + if (srcRate == targetRate || srcFrames <= 0 || channels <= 0) { + return ResampleResult(src, srcFrames) + } + + val outFrames = ((srcFrames.toLong() * targetRate) / srcRate).toInt() + if (outFrames <= 0) { + return ResampleResult(ShortArray(0), 0) + } + + val resampled = if (targetRate > srcRate) { + upsample(src, srcFrames, outFrames, channels) + } else { + downsample(src, srcFrames, outFrames, srcRate, targetRate, channels) + } + + return ResampleResult(resampled, outFrames) + } + + /** + * Linear interpolation upsampling. + */ + private fun upsample( + src: ShortArray, + srcFrames: Int, + outFrames: Int, + channels: Int + ): ShortArray { + val out = ShortArray(outFrames * channels) + + // Edge case: single source frame — just repeat it + if (srcFrames <= 1) { + for (f in 0 until outFrames) { + for (ch in 0 until channels) { + out[f * channels + ch] = src[ch] + } + } + return out + } + + val ratio = srcFrames.toDouble() / outFrames.toDouble() + + for (f in 0 until outFrames) { + val srcPos = f * ratio + val idx = srcPos.toInt().coerceAtMost(srcFrames - 2) + val frac = (srcPos - idx).toFloat() + + for (ch in 0 until channels) { + val s0 = src[idx * channels + ch] + val s1 = src[(idx + 1) * channels + ch] + out[f * channels + ch] = (s0 + frac * (s1 - s0)).toInt() + .coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()) + .toShort() + } + } + + return out + } + + /** + * Box filter downsampling. Averages all source samples that map to each + * output sample, acting as a low-pass filter to prevent aliasing. + */ + private fun downsample( + src: ShortArray, + srcFrames: Int, + outFrames: Int, + srcRate: Int, + targetRate: Int, + channels: Int + ): ShortArray { + val out = ShortArray(outFrames * channels) + val ratio = srcRate.toDouble() / targetRate.toDouble() + + for (f in 0 until outFrames) { + val srcStart = (f * ratio).toInt() + val srcEnd = ((f + 1) * ratio).toInt().coerceAtMost(srcFrames) + + for (ch in 0 until channels) { + var sum = 0L + for (i in srcStart until srcEnd) { + sum += src[i * channels + ch] + } + val count = srcEnd - srcStart + out[f * channels + ch] = if (count > 0) { + (sum / count).toInt() + .coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt()) + .toShort() + } else { + 0 + } + } + } + + return out + } + + data class ResampleResult(val samples: ShortArray, val frameCount: Int) +} diff --git a/android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt b/android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt new file mode 100644 index 000000000..df9c27acb --- /dev/null +++ b/android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt @@ -0,0 +1,193 @@ +package io.livekit.plugin + +import kotlin.math.PI +import kotlin.math.roundToInt +import kotlin.math.sin +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertTrue + +class AudioResamplerTest { + + // --- Passthrough --- + + @Test + fun `same rate returns identical samples`() { + val src = shortArrayOf(100, 200, 300, 400, 500) + val result = AudioResampler.resample(src, 5, 48000, 48000, 1) + assertEquals(5, result.frameCount) + assertTrue(src.contentEquals(result.samples)) + } + + // --- Upsample --- + + @Test + fun `upsample 8kHz to 48kHz produces 6x frames`() { + val srcFrames = 80 + val src = ShortArray(srcFrames) { (it * 100).toShort() } + val result = AudioResampler.resample(src, srcFrames, 8000, 48000, 1) + assertEquals(480, result.frameCount) + assertEquals(480, result.samples.size) + } + + @Test + fun `upsample preserves first and last sample`() { + val src = shortArrayOf(0, 1000, 2000, 3000) + val result = AudioResampler.resample(src, 4, 8000, 16000, 1) + assertEquals(0, result.samples[0].toInt()) + } + + @Test + fun `upsample with stereo preserves channel count`() { + // 4 frames, 2 channels: [L0, R0, L1, R1, L2, R2, L3, R3] + val src = shortArrayOf(100, -100, 200, -200, 300, -300, 400, -400) + val result = AudioResampler.resample(src, 4, 8000, 16000, 2) + assertEquals(8, result.frameCount) + assertEquals(16, result.samples.size) // 8 frames * 2 channels + } + + // --- Downsample --- + + @Test + fun `downsample 48kHz to 8kHz produces one-sixth frames`() { + val srcFrames = 480 + val src = ShortArray(srcFrames) { 1000 } + val result = AudioResampler.resample(src, srcFrames, 48000, 8000, 1) + assertEquals(80, result.frameCount) + assertEquals(80, result.samples.size) + for (s in result.samples) { + assertEquals(1000, s.toInt()) + } + } + + @Test + fun `downsample averages samples correctly`() { + // 6 samples at 48kHz → 1 sample at 8kHz, should average the 6 values + val src = shortArrayOf(100, 200, 300, 400, 500, 600) + val result = AudioResampler.resample(src, 6, 48000, 8000, 1) + assertEquals(1, result.frameCount) + // Average of 100..600 = 350 + assertEquals(350, result.samples[0].toInt()) + } + + // --- Sine wave preservation (signal below Nyquist survives) --- + + @Test + fun `440Hz sine survives downsample from 48kHz to 16kHz`() { + val srcRate = 48000 + val targetRate = 16000 + val freq = 440.0 + val durationSec = 0.1 + val srcFrames = (srcRate * durationSec).toInt() + + val src = ShortArray(srcFrames) { i -> + (sin(2.0 * PI * freq * i / srcRate) * 20000).roundToInt().toShort() + } + + val result = AudioResampler.resample(src, srcFrames, srcRate, targetRate, 1) + val expectedFrames = (srcFrames.toLong() * targetRate / srcRate).toInt() + assertEquals(expectedFrames, result.frameCount) + + // Verify the resampled signal still contains 440Hz by checking + // zero-crossings to estimate frequency + val zeroCrossings = countZeroCrossings(result.samples, result.frameCount) + // Each full cycle has 2 zero crossings + val estimatedFreq = (zeroCrossings / 2.0) / durationSec + // Allow 10% tolerance + assertTrue( + estimatedFreq > freq * 0.9 && estimatedFreq < freq * 1.1, + "Expected ~440Hz, estimated ${estimatedFreq}Hz (zeroCrossings=$zeroCrossings)" + ) + } + + // --- Aliasing rejection (signal above Nyquist is attenuated) --- + + @Test + fun `5kHz sine is attenuated when downsampled to 8kHz`() { + val srcRate = 48000 + val targetRate = 8000 // Nyquist = 4kHz + val freq = 5000.0 // Above Nyquist + val durationSec = 0.05 + val srcFrames = (srcRate * durationSec).toInt() + + val src = ShortArray(srcFrames) { i -> + (sin(2.0 * PI * freq * i / srcRate) * 20000).roundToInt().toShort() + } + + val result = AudioResampler.resample(src, srcFrames, srcRate, targetRate, 1) + + // With box filter, the 5kHz signal should be significantly attenuated. + val inputRms = rms(src, srcFrames) + val outputRms = rms(result.samples, result.frameCount) + + // Output RMS should be at most 50% of input RMS. + assertTrue( + outputRms < inputRms * 0.5, + "5kHz signal should be attenuated: inputRms=$inputRms, outputRms=$outputRms, " + + "ratio=${outputRms / inputRms}" + ) + } + + @Test + fun `1kHz sine is preserved when downsampled to 8kHz`() { + val srcRate = 48000 + val targetRate = 8000 // Nyquist = 4kHz + val freq = 1000.0 // Well below Nyquist + val durationSec = 0.05 + val srcFrames = (srcRate * durationSec).toInt() + + val src = ShortArray(srcFrames) { i -> + (sin(2.0 * PI * freq * i / srcRate) * 20000).roundToInt().toShort() + } + + val result = AudioResampler.resample(src, srcFrames, srcRate, targetRate, 1) + + // 1kHz is well below Nyquist, RMS should remain roughly similar (within 30%) + val inputRms = rms(src, srcFrames) + val outputRms = rms(result.samples, result.frameCount) + + assertTrue( + outputRms > inputRms * 0.7, + "1kHz signal should be preserved: inputRms=$inputRms, outputRms=$outputRms, " + + "ratio=${outputRms / inputRms}" + ) + } + + // --- Edge cases --- + + @Test + fun `zero frames returns empty`() { + val result = AudioResampler.resample(ShortArray(0), 0, 48000, 16000, 1) + assertEquals(0, result.frameCount) + } + + @Test + fun `single frame upsample`() { + val src = shortArrayOf(1000) + val result = AudioResampler.resample(src, 1, 8000, 48000, 1) + assertEquals(6, result.frameCount) + } + + // --- Helpers --- + + private fun countZeroCrossings(samples: ShortArray, count: Int): Int { + var crossings = 0 + for (i in 1 until count) { + if ((samples[i - 1] >= 0 && samples[i] < 0) || + (samples[i - 1] < 0 && samples[i] >= 0) + ) { + crossings++ + } + } + return crossings + } + + private fun rms(samples: ShortArray, count: Int): Double { + if (count == 0) return 0.0 + var sumSq = 0.0 + for (i in 0 until count) { + sumSq += samples[i].toDouble() * samples[i].toDouble() + } + return kotlin.math.sqrt(sumSq / count) + } +}