From f0697d6dde2067bbb3b84ee3f8c061b712064949 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 24 Feb 2026 02:36:58 +0900
Subject: [PATCH 01/10] add changeset

---
 .changes/pre-connect-audio-web | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 .changes/pre-connect-audio-web
diff --git a/.changes/pre-connect-audio-web b/.changes/pre-connect-audio-web
new file mode 100644
index 000000000..ecf31ff9c
--- /dev/null
+++ b/.changes/pre-connect-audio-web
@@ -0,0 +1 @@
+minor type="added" "Add web support for pre-connect audio buffer via AudioWorklet"

From 9267e888e9d2e61984b7694681aeea58fa776699 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 24 Feb 2026 03:11:13 +0900
Subject: [PATCH 02/10] change

---
 .changes/pre-connect-audio-web | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.changes/pre-connect-audio-web b/.changes/pre-connect-audio-web
index ecf31ff9c..d21408104 100644
--- a/.changes/pre-connect-audio-web
+++ b/.changes/pre-connect-audio-web
@@ -1 +1 @@
-minor type="added" "Add web support for pre-connect audio buffer via AudioWorklet"
+patch type="added" "Add web support for pre-connect audio buffer"

From 51fde82792a373425db7da997725ef8b1e57b39e Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 24 Feb 2026 03:02:41 +0900
Subject: [PATCH 03/10] impl1

---
 lib/livekit_client.dart                     |  1 +
 lib/src/preconnect/audio_frame_capture.dart |  3 +
 lib/src/track/local/local.dart              | 71 +++++++++++++++++++++
 3 files changed, 75 insertions(+)

diff --git a/lib/livekit_client.dart b/lib/livekit_client.dart
index 27276733b..86b6cb607 100644
--- a/lib/livekit_client.dart
+++ b/lib/livekit_client.dart
@@ -41,6 +41,7 @@ export 'src/agent/room_agent.dart';
 export 'src/participant/local.dart';
 export 'src/participant/participant.dart';
 export 'src/participant/remote.dart' hide ParticipantCreationResult;
+export 'src/preconnect/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback;
 export 'src/preconnect/pre_connect_audio_buffer.dart';
 export 'src/publication/local.dart';
 export 'src/publication/remote.dart';
diff --git a/lib/src/preconnect/audio_frame_capture.dart b/lib/src/preconnect/audio_frame_capture.dart
index ba3f983a3..c36ef1444 100644
--- a/lib/src/preconnect/audio_frame_capture.dart
+++ b/lib/src/preconnect/audio_frame_capture.dart
@@ -56,5 +56,8 @@ abstract class AudioFrameCapture {
   Future<void> stop();
 }
 
+/// Callback for receiving audio frames.
+typedef AudioFrameCallback = void Function(AudioFrame frame);
+
 /// Factory that returns the platform-appropriate implementation.
 AudioFrameCapture createAudioFrameCapture() => createAudioFrameCaptureImpl();
diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart
index 2ce82883b..079968c26 100644
--- a/lib/src/track/local/local.dart
+++ b/lib/src/track/local/local.dart
@@ -33,6 +33,7 @@ import '../processor.dart';
 import '../processor_native.dart' if (dart.library.js_interop) '../processor_web.dart';
 import '../remote/audio.dart';
 import '../remote/video.dart';
+import '../../preconnect/audio_frame_capture.dart';
 import '../track.dart';
 import 'audio.dart';
 import 'video.dart';
@@ -60,6 +61,74 @@ mixin VideoTrack on Track {
 
 /// Used to group [LocalAudioTrack] and [RemoteAudioTrack].
 mixin AudioTrack on Track {
+  AudioFrameCapture? _audioCapture;
+  final List<AudioFrameCallback> _audioRenderers = [];
+  StreamSubscription? _audioFrameSubscription;
+
+  /// Register a callback to receive raw PCM audio frames from this track.
+  ///
+  /// Returns a function that, when called, removes this renderer.
+  /// When the last renderer is removed, audio capture stops automatically.
+  CancelListenFunc addAudioRenderer({
+    required AudioFrameCallback onFrame,
+    int sampleRate = 24000,
+    int channels = 1,
+    String commonFormat = 'int16',
+  }) {
+    _audioRenderers.add(onFrame);
+
+    if (_audioRenderers.length == 1) {
+      _startAudioCapture(
+        sampleRate: sampleRate,
+        channels: channels,
+        commonFormat: commonFormat,
+      );
+    }
+
+    return () async {
+      _audioRenderers.remove(onFrame);
+      if (_audioRenderers.isEmpty) {
+        await _stopAudioCapture();
+      }
+    };
+  }
+
+  Future<void> _startAudioCapture({
+    required int sampleRate,
+    required int channels,
+    required String commonFormat,
+  }) async {
+    final capture = createAudioFrameCapture();
+    _audioCapture = capture;
+    final rendererId = Track.uuid.v4();
+
+    final result = await capture.start(
+      track: mediaStreamTrack,
+      rendererId: rendererId,
+      sampleRate: sampleRate,
+      channels: channels,
+      commonFormat: commonFormat,
+    );
+
+    if (!result) {
+      logger.warning('Failed to start audio capture for renderer');
+      return;
+    }
+
+    _audioFrameSubscription = capture.frameStream.listen((frame) {
+      for (final renderer in List.of(_audioRenderers)) {
+        renderer(frame);
+      }
+    });
+  }
+
+  Future<void> _stopAudioCapture() async {
+    await _audioFrameSubscription?.cancel();
+    _audioFrameSubscription = null;
+    await _audioCapture?.stop();
+    _audioCapture = null;
+  }
+
   @override
   Future<void> onStarted() async {
     logger.fine('AudioTrack.onStarted()');
@@ -68,6 +137,8 @@ mixin AudioTrack on Track {
   @override
   Future<void> onStopped() async {
     logger.fine('AudioTrack.onStopped()');
+    await _stopAudioCapture();
+    _audioRenderers.clear();
   }
 }
 

From 349b3ebe4647d89672c107375d73a3dfc963af1d Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 24 Feb 2026 03:05:22 +0900
Subject: [PATCH 04/10] refactor

---
 lib/livekit_client.dart                                       | 2 +-
 lib/src/{preconnect => audio}/audio_frame_capture.dart        | 0
 lib/src/{preconnect => audio}/audio_frame_capture_native.dart | 0
 lib/src/{preconnect => audio}/audio_frame_capture_web.dart    | 2 +-
 lib/src/{support => audio}/audio_pcm_utils.dart               | 0
 lib/src/preconnect/pre_connect_audio_buffer.dart              | 2 +-
 lib/src/track/local/local.dart                                | 2 +-
 test/preconnect/audio_frame_capture_test.dart                 | 4 ++--
 8 files changed, 6 insertions(+), 6 deletions(-)
 rename lib/src/{preconnect => audio}/audio_frame_capture.dart (100%)
 rename lib/src/{preconnect => audio}/audio_frame_capture_native.dart (100%)
 rename lib/src/{preconnect => audio}/audio_frame_capture_web.dart (99%)
 rename lib/src/{support => audio}/audio_pcm_utils.dart (100%)

diff --git a/lib/livekit_client.dart b/lib/livekit_client.dart
index 86b6cb607..a16aac6dd 100644
--- a/lib/livekit_client.dart
+++ b/lib/livekit_client.dart
@@ -41,7 +41,7 @@ export 'src/agent/room_agent.dart';
 export 'src/participant/local.dart';
 export 'src/participant/participant.dart';
 export 'src/participant/remote.dart' hide ParticipantCreationResult;
-export 'src/preconnect/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback;
+export 'src/audio/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback;
 export 'src/preconnect/pre_connect_audio_buffer.dart';
 export 'src/publication/local.dart';
 export 'src/publication/remote.dart';
diff --git a/lib/src/preconnect/audio_frame_capture.dart b/lib/src/audio/audio_frame_capture.dart
similarity index 100%
rename from lib/src/preconnect/audio_frame_capture.dart
rename to lib/src/audio/audio_frame_capture.dart
diff --git a/lib/src/preconnect/audio_frame_capture_native.dart b/lib/src/audio/audio_frame_capture_native.dart
similarity index 100%
rename from lib/src/preconnect/audio_frame_capture_native.dart
rename to lib/src/audio/audio_frame_capture_native.dart
diff --git a/lib/src/preconnect/audio_frame_capture_web.dart b/lib/src/audio/audio_frame_capture_web.dart
similarity index 99%
rename from lib/src/preconnect/audio_frame_capture_web.dart
rename to lib/src/audio/audio_frame_capture_web.dart
index a44e9b4eb..7bc21ee56 100644
--- a/lib/src/preconnect/audio_frame_capture_web.dart
+++ b/lib/src/audio/audio_frame_capture_web.dart
@@ -22,8 +22,8 @@ import 'package:flutter_webrtc/flutter_webrtc.dart' show MediaStreamTrack;
 import 'package:web/web.dart' as web;
 
 import '../logger.dart';
-import '../support/audio_pcm_utils.dart';
 import 'audio_frame_capture.dart';
+import 'audio_pcm_utils.dart';
 
 /// JavaScript source for the AudioWorkletProcessor.
 ///
diff --git a/lib/src/support/audio_pcm_utils.dart b/lib/src/audio/audio_pcm_utils.dart
similarity index 100%
rename from lib/src/support/audio_pcm_utils.dart
rename to lib/src/audio/audio_pcm_utils.dart
diff --git a/lib/src/preconnect/pre_connect_audio_buffer.dart b/lib/src/preconnect/pre_connect_audio_buffer.dart
index 0dedf25ea..ae222dc43 100644
--- a/lib/src/preconnect/pre_connect_audio_buffer.dart
+++ b/lib/src/preconnect/pre_connect_audio_buffer.dart
@@ -19,6 +19,7 @@ import 'package:flutter/foundation.dart' show kIsWeb;
 import 'package:flutter_webrtc/flutter_webrtc.dart' as webrtc;
 import 'package:uuid/uuid.dart';
 
+import '../audio/audio_frame_capture.dart';
 import '../core/room.dart';
 import '../events.dart';
 import '../logger.dart';
@@ -29,7 +30,6 @@ import '../track/local/audio.dart';
 import '../types/data_stream.dart';
 import '../types/other.dart';
 import '../types/participant_state.dart';
-import 'audio_frame_capture.dart';
 
 typedef PreConnectOnError = void Function(Object error);
 
diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart
index 079968c26..baa39ca93 100644
--- a/lib/src/track/local/local.dart
+++ b/lib/src/track/local/local.dart
@@ -20,6 +20,7 @@ import 'package:flutter/material.dart';
 import 'package:flutter_webrtc/flutter_webrtc.dart' as rtc;
 import 'package:meta/meta.dart';
 
+import '../../audio/audio_frame_capture.dart';
 import '../../events.dart';
 import '../../exceptions.dart';
 import '../../extensions.dart';
@@ -33,7 +34,6 @@ import '../processor.dart';
 import '../processor_native.dart' if (dart.library.js_interop) '../processor_web.dart';
 import '../remote/audio.dart';
 import '../remote/video.dart';
-import '../../preconnect/audio_frame_capture.dart';
 import '../track.dart';
 import 'audio.dart';
 import 'video.dart';
diff --git a/test/preconnect/audio_frame_capture_test.dart b/test/preconnect/audio_frame_capture_test.dart
index d4a536825..1183b10f6 100644
--- a/test/preconnect/audio_frame_capture_test.dart
+++ b/test/preconnect/audio_frame_capture_test.dart
@@ -3,8 +3,8 @@ import 'dart:typed_data';
 
 import 'package:flutter_test/flutter_test.dart';
 
-import 'package:livekit_client/src/preconnect/audio_frame_capture.dart';
-import 'package:livekit_client/src/support/audio_pcm_utils.dart';
+import 'package:livekit_client/src/audio/audio_frame_capture.dart';
+import 'package:livekit_client/src/audio/audio_pcm_utils.dart';
 import 'package:livekit_client/src/support/byte_ring_buffer.dart';
 
 /// A mock AudioFrameCapture that emits frames from a StreamController.

From f992e7bd4bcc171983354ba4e54e15282f868baa Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 3 Mar 2026 00:12:20 +0900
Subject: [PATCH 05/10] fix: wrap _startAudioCapture in unawaited to fix
 discarded_futures lint

---
 lib/src/track/local/local.dart | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart
index baa39ca93..a84e41da7 100644
--- a/lib/src/track/local/local.dart
+++ b/lib/src/track/local/local.dart
@@ -78,11 +78,11 @@ mixin AudioTrack on Track {
     _audioRenderers.add(onFrame);
 
     if (_audioRenderers.length == 1) {
-      _startAudioCapture(
+      unawaited(_startAudioCapture(
         sampleRate: sampleRate,
         channels: channels,
         commonFormat: commonFormat,
-      );
+      ));
     }
 
     return () async {

From 4ce076574c9e04a8c5f099aa75339e3827a5be78 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 3 Mar 2026 13:15:42 +0900
Subject: [PATCH 06/10] changes

---
 .changes/audio-renderer-api    | 1 +
 .changes/pre-connect-audio-web | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 .changes/audio-renderer-api
 delete mode 100644 .changes/pre-connect-audio-web

diff --git a/.changes/audio-renderer-api b/.changes/audio-renderer-api
new file mode 100644
index 000000000..ab40d5c28
--- /dev/null
+++ b/.changes/audio-renderer-api
@@ -0,0 +1 @@
+minor type="added" "Add audio renderer API for receiving raw audio frames"
diff --git a/.changes/pre-connect-audio-web b/.changes/pre-connect-audio-web
deleted file mode 100644
index d21408104..000000000
--- a/.changes/pre-connect-audio-web
+++ /dev/null
@@ -1 +0,0 @@
-patch type="added" "Add web support for pre-connect audio buffer"

From db68a20271244882b80dc70774d9215f01ccfbc1 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 3 Mar 2026 13:21:23 +0900
Subject: [PATCH 07/10] options & format

p1
---
 lib/livekit_client.dart                       |  2 +-
 lib/src/audio/audio_frame_capture.dart        | 39 ++++++++-
 lib/src/audio/audio_frame_capture_native.dart |  7 +-
 lib/src/audio/audio_frame_capture_web.dart    | 10 +--
 .../preconnect/pre_connect_audio_buffer.dart  |  2 +-
 lib/src/track/local/local.dart                | 82 ++++++++++---------
 test/preconnect/audio_frame_capture_test.dart | 34 ++++----
 7 files changed, 107 insertions(+), 69 deletions(-)

diff --git a/lib/livekit_client.dart b/lib/livekit_client.dart
index a16aac6dd..cb0408d67 100644
--- a/lib/livekit_client.dart
+++ b/lib/livekit_client.dart
@@ -41,7 +41,7 @@ export 'src/agent/room_agent.dart';
 export 'src/participant/local.dart';
 export 'src/participant/participant.dart';
 export 'src/participant/remote.dart' hide ParticipantCreationResult;
-export 'src/audio/audio_frame_capture.dart' show AudioFrame, AudioFrameCallback;
+export 'src/audio/audio_frame_capture.dart' show AudioFormat, AudioFrame, AudioFrameCallback, AudioRendererOptions;
 export 'src/preconnect/pre_connect_audio_buffer.dart';
 export 'src/publication/local.dart';
 export 'src/publication/remote.dart';
diff --git a/lib/src/audio/audio_frame_capture.dart b/lib/src/audio/audio_frame_capture.dart
index c36ef1444..b92f776ac 100644
--- a/lib/src/audio/audio_frame_capture.dart
+++ b/lib/src/audio/audio_frame_capture.dart
@@ -18,18 +18,27 @@ import 'package:flutter_webrtc/flutter_webrtc.dart' show MediaStreamTrack;
 
 import 'audio_frame_capture_native.dart' if (dart.library.js_interop) 'audio_frame_capture_web.dart';
 
+/// PCM sample format for audio frame capture.
+enum AudioFormat {
+  Int16('int16'),
+  Float32('float32');
+
+  final String value;
+  const AudioFormat(this.value);
+}
+
 /// A single frame of raw PCM audio data.
 class AudioFrame {
   final int sampleRate;
   final int channels;
   final Uint8List data;
-  final String commonFormat;
+  final AudioFormat format;
 
   const AudioFrame({
     required this.sampleRate,
     required this.channels,
     required this.data,
-    required this.commonFormat,
+    required this.format,
   });
 }
 
@@ -49,7 +58,7 @@ abstract class AudioFrameCapture {
     required String rendererId,
     required int sampleRate,
     required int channels,
-    required String commonFormat,
+    required AudioFormat format,
   });
 
   /// Stop capturing and release resources.
@@ -59,5 +68,29 @@ abstract class AudioFrameCapture {
 /// Callback for receiving audio frames.
 typedef AudioFrameCallback = void Function(AudioFrame frame);
 
+/// Options for configuring audio frame capture format.
+class AudioRendererOptions {
+  final int sampleRate;
+  final int channels;
+  final AudioFormat format;
+
+  const AudioRendererOptions({
+    this.sampleRate = 24000,
+    this.channels = 1,
+    this.format = AudioFormat.Int16,
+  });
+
+  @override
+  bool operator ==(Object other) =>
+      identical(this, other) ||
+      other is AudioRendererOptions &&
+          sampleRate == other.sampleRate &&
+          channels == other.channels &&
+          format == other.format;
+
+  @override
+  int get hashCode => Object.hash(sampleRate, channels, format);
+}
+
 /// Factory that returns the platform-appropriate implementation.
 AudioFrameCapture createAudioFrameCapture() => createAudioFrameCaptureImpl();
diff --git a/lib/src/audio/audio_frame_capture_native.dart b/lib/src/audio/audio_frame_capture_native.dart
index bea7587b2..a7b06a1d9 100644
--- a/lib/src/audio/audio_frame_capture_native.dart
+++ b/lib/src/audio/audio_frame_capture_native.dart
@@ -39,13 +39,13 @@ class AudioFrameCaptureNative implements AudioFrameCapture {
     required String rendererId,
     required int sampleRate,
     required int channels,
-    required String commonFormat,
+    required AudioFormat format,
   }) async {
     final result = await Native.startAudioRenderer(
       trackId: track.id!,
       rendererId: rendererId,
       format: {
-        'commonFormat': commonFormat,
+        'commonFormat': format.value,
         'sampleRate': sampleRate,
         'channels': channels,
       },
@@ -58,11 +58,12 @@ class AudioFrameCaptureNative implements AudioFrameCapture {
     _eventChannel = EventChannel('io.livekit.audio.renderer/channel-$rendererId');
     _streamSubscription = _eventChannel?.receiveBroadcastStream().listen((event) {
       try {
+        final rawFormat = event['commonFormat'] as String?;
         _controller.add(AudioFrame(
           sampleRate: event['sampleRate'] as int,
           channels: event['channels'] as int,
           data: event['data'] as Uint8List,
-          commonFormat: (event['commonFormat'] as String?) ?? commonFormat,
+          format: rawFormat == AudioFormat.Float32.value ? AudioFormat.Float32 : AudioFormat.Int16,
         ));
       } catch (e) {
         logger.warning('[AudioFrameCapture] Error parsing native event: $e');
diff --git a/lib/src/audio/audio_frame_capture_web.dart b/lib/src/audio/audio_frame_capture_web.dart
index 7bc21ee56..2e7ad4581 100644
--- a/lib/src/audio/audio_frame_capture_web.dart
+++ b/lib/src/audio/audio_frame_capture_web.dart
@@ -63,7 +63,7 @@ class AudioFrameCaptureWeb implements AudioFrameCapture {
   web.AudioWorkletNode? _workletNode;
   web.AudioNode? _sourceNode;
   StreamController<AudioFrame>? _controller;
-  String _targetFormat = 'int16';
+  AudioFormat _targetFormat = AudioFormat.Int16;
   int _targetChannels = 1;
 
   @override
@@ -75,9 +75,9 @@ class AudioFrameCaptureWeb implements AudioFrameCapture {
     required String rendererId,
     required int sampleRate,
     required int channels,
-    required String commonFormat,
+    required AudioFormat format,
   }) async {
-    _targetFormat = commonFormat;
+    _targetFormat = format;
     _targetChannels = channels;
     _controller ??= StreamController<AudioFrame>.broadcast();
 
@@ -144,7 +144,7 @@ class AudioFrameCaptureWeb implements AudioFrameCapture {
       final srcFloat32 = samplesBuffer.asFloat32List();
 
       final Uint8List bytes;
-      if (_targetFormat == 'float32') {
+      if (_targetFormat == AudioFormat.Float32) {
         bytes = float32ToFloat32Bytes(srcFloat32, channels, outChannels, frames);
       } else {
         bytes = float32ToInt16Bytes(srcFloat32, channels, outChannels, frames);
@@ -154,7 +154,7 @@ class AudioFrameCaptureWeb implements AudioFrameCapture {
         sampleRate: actualSampleRate,
         channels: outChannels,
         data: bytes,
-        commonFormat: _targetFormat,
+        format: _targetFormat,
       ));
     } catch (e) {
       logger.warning('[AudioFrameCapture] Error processing worklet frame: $e');
diff --git a/lib/src/preconnect/pre_connect_audio_buffer.dart b/lib/src/preconnect/pre_connect_audio_buffer.dart
index ae222dc43..47b3f22e4 100644
--- a/lib/src/preconnect/pre_connect_audio_buffer.dart
+++ b/lib/src/preconnect/pre_connect_audio_buffer.dart
@@ -132,7 +132,7 @@ class PreConnectAudioBuffer {
       rendererId: rendererId,
       sampleRate: _requestSampleRate,
       channels: 1,
-      commonFormat: 'int16',
+      format: AudioFormat.Int16,
     );
 
     if (!result) {
diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart
index a84e41da7..db3b0550b 100644
--- a/lib/src/track/local/local.dart
+++ b/lib/src/track/local/local.dart
@@ -61,53 +61,49 @@ mixin VideoTrack on Track {
 
 /// Used to group [LocalAudioTrack] and [RemoteAudioTrack].
 mixin AudioTrack on Track {
-  AudioFrameCapture? _audioCapture;
-  final List<AudioFrameCallback> _audioRenderers = [];
-  StreamSubscription? _audioFrameSubscription;
+  final Map<AudioRendererOptions, _AudioCaptureGroup> _captureGroups = {};
 
   /// Register a callback to receive raw PCM audio frames from this track.
   ///
+  /// Multiple renderers with different [options] each get their own capture
+  /// pipeline. Renderers sharing the same options share a single capture.
+  ///
   /// Returns a function that, when called, removes this renderer.
-  /// When the last renderer is removed, audio capture stops automatically.
+  /// When the last renderer for a given options config is removed, that
+  /// capture stops automatically.
   CancelListenFunc addAudioRenderer({
     required AudioFrameCallback onFrame,
-    int sampleRate = 24000,
-    int channels = 1,
-    String commonFormat = 'int16',
+    AudioRendererOptions options = const AudioRendererOptions(),
   }) {
-    _audioRenderers.add(onFrame);
-
-    if (_audioRenderers.length == 1) {
-      unawaited(_startAudioCapture(
-        sampleRate: sampleRate,
-        channels: channels,
-        commonFormat: commonFormat,
-      ));
-    }
+    final group = _captureGroups.putIfAbsent(options, () {
+      final g = _AudioCaptureGroup();
+      unawaited(_startCaptureGroup(g, options));
+      return g;
+    });
+    group.renderers.add(onFrame);
 
     return () async {
-      _audioRenderers.remove(onFrame);
-      if (_audioRenderers.isEmpty) {
-        await _stopAudioCapture();
+      group.renderers.remove(onFrame);
+      if (group.renderers.isEmpty) {
+        _captureGroups.remove(options);
+        await _stopCaptureGroup(group);
       }
     };
   }
 
-  Future<void> _startAudioCapture({
-    required int sampleRate,
-    required int channels,
-    required String commonFormat,
-  }) async {
+  Future<void> _startCaptureGroup(
+    _AudioCaptureGroup group,
+    AudioRendererOptions options,
+  ) async {
     final capture = createAudioFrameCapture();
-    _audioCapture = capture;
-    final rendererId = Track.uuid.v4();
+    group.capture = capture;
 
     final result = await capture.start(
       track: mediaStreamTrack,
-      rendererId: rendererId,
-      sampleRate: sampleRate,
-      channels: channels,
-      commonFormat: commonFormat,
+      rendererId: Track.uuid.v4(),
+      sampleRate: options.sampleRate,
+      channels: options.channels,
+      format: options.format,
     );
 
     if (!result) {
@@ -115,18 +111,18 @@ mixin AudioTrack on Track {
       return;
     }
 
-    _audioFrameSubscription = capture.frameStream.listen((frame) {
-      for (final renderer in List.of(_audioRenderers)) {
+    group.subscription = capture.frameStream.listen((frame) {
+      for (final renderer in List.of(group.renderers)) {
         renderer(frame);
       }
     });
   }
 
-  Future<void> _stopAudioCapture() async {
-    await _audioFrameSubscription?.cancel();
-    _audioFrameSubscription = null;
-    await _audioCapture?.stop();
-    _audioCapture = null;
+  Future<void> _stopCaptureGroup(_AudioCaptureGroup group) async {
+    await group.subscription?.cancel();
+    group.subscription = null;
+    await group.capture?.stop();
+    group.capture = null;
   }
 
   @override
@@ -137,11 +133,19 @@ mixin AudioTrack on Track {
   @override
   Future<void> onStopped() async {
     logger.fine('AudioTrack.onStopped()');
-    await _stopAudioCapture();
-    _audioRenderers.clear();
+    for (final group in _captureGroups.values) {
+      await _stopCaptureGroup(group);
+    }
+    _captureGroups.clear();
   }
 }
 
+class _AudioCaptureGroup {
+  AudioFrameCapture? capture;
+  StreamSubscription? subscription;
+  final List<AudioFrameCallback> renderers = [];
+}
+
 /// Base class for [LocalAudioTrack] and [LocalVideoTrack].
 abstract class LocalTrack extends Track {
   /// Options used for this track
diff --git a/test/preconnect/audio_frame_capture_test.dart b/test/preconnect/audio_frame_capture_test.dart
index 1183b10f6..384975432 100644
--- a/test/preconnect/audio_frame_capture_test.dart
+++ b/test/preconnect/audio_frame_capture_test.dart
@@ -22,7 +22,7 @@ class MockAudioFrameCapture implements AudioFrameCapture {
     required String rendererId,
     required int sampleRate,
     required int channels,
-    required String commonFormat,
+    required AudioFormat format,
   }) async {
     started = true;
     return true;
@@ -75,12 +75,12 @@ void main() {
         sampleRate: 48000,
         channels: 1,
         data: int16Bytes([100, -200, 300]),
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       expect(frame.sampleRate, 48000);
       expect(frame.channels, 1);
-      expect(frame.commonFormat, 'int16');
+      expect(frame.format, AudioFormat.Int16);
       expect(frame.data.length, 6); // 3 samples * 2 bytes
     });
   });
@@ -93,7 +93,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 24000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       expect(result, true);
@@ -107,7 +107,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 24000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       final frames = <AudioFrame>[];
@@ -117,14 +117,14 @@ void main() {
         sampleRate: 24000,
         channels: 1,
         data: int16Bytes([1000, -1000]),
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       ));
 
       capture.emitFrame(AudioFrame(
         sampleRate: 24000,
         channels: 1,
         data: int16Bytes([2000, -2000]),
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       ));
 
       // Let microtasks run.
@@ -145,7 +145,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 24000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
       await capture.stop();
 
@@ -253,7 +253,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 24000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       int? capturedSampleRate;
@@ -272,7 +272,7 @@ void main() {
           sampleRate: 48000,
           channels: 1,
           data: int16Bytes(samples),
-          commonFormat: 'int16',
+          format: AudioFormat.Int16,
         ));
       }
 
@@ -302,7 +302,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 24000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       bool overflowed = false;
@@ -317,13 +317,13 @@ void main() {
         sampleRate: 24000,
         channels: 1,
         data: Uint8List(60),
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       ));
       capture.emitFrame(AudioFrame(
         sampleRate: 24000,
         channels: 1,
         data: Uint8List(60),
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       ));
 
       await Future<void>.delayed(Duration.zero);
@@ -344,7 +344,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 48000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       // Simulate what the web implementation does: receive float32 from
@@ -361,7 +361,7 @@ void main() {
         sampleRate: 48000,
         channels: 1,
         data: samples.buffer.asUint8List(),
-        commonFormat: 'float32',
+        format: AudioFormat.Float32,
       ));
 
       await Future<void>.delayed(Duration.zero);
@@ -392,7 +392,7 @@ void main() {
         rendererId: 'test-id',
         sampleRate: 48000,
         channels: 1,
-        commonFormat: 'int16',
+        format: AudioFormat.Int16,
       );
 
       final sub = capture.frameStream.listen((frame) {
@@ -408,7 +408,7 @@ void main() {
         sampleRate: 48000,
         channels: 2,
         data: stereo.buffer.asUint8List(),
-        commonFormat: 'float32',
+        format: AudioFormat.Float32,
       ));
 
       await Future<void>.delayed(Duration.zero);

From b66e4c2505bc8d23b8a14254409b137773b017d2 Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 3 Mar 2026 13:32:43 +0900
Subject: [PATCH 08/10] patch race

---
 lib/src/track/local/local.dart | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart
index db3b0550b..afc464990 100644
--- a/lib/src/track/local/local.dart
+++ b/lib/src/track/local/local.dart
@@ -77,7 +77,7 @@ mixin AudioTrack on Track {
   }) {
     final group = _captureGroups.putIfAbsent(options, () {
       final g = _AudioCaptureGroup();
-      unawaited(_startCaptureGroup(g, options));
+      g.startFuture = _startCaptureGroup(g, options);
       return g;
     });
     group.renderers.add(onFrame);
@@ -119,6 +119,7 @@ mixin AudioTrack on Track {
   }
 
   Future<void> _stopCaptureGroup(_AudioCaptureGroup group) async {
+    await group.startFuture;
     await group.subscription?.cancel();
     group.subscription = null;
     await group.capture?.stop();
@@ -143,6 +144,7 @@ mixin AudioTrack on Track {
 class _AudioCaptureGroup {
   AudioFrameCapture? capture;
   StreamSubscription? subscription;
+  Future<void>? startFuture;
   final List<AudioFrameCallback> renderers = [];
 }
 

From ddf31984fc81eef6574ea2ad473b82229ce18efc Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Tue, 3 Mar 2026 13:37:13 +0900
Subject: [PATCH 09/10] ref

---
 lib/src/track/local/local.dart | 85 ++++++++++++++++++----------------
 1 file changed, 44 insertions(+), 41 deletions(-)

diff --git a/lib/src/track/local/local.dart b/lib/src/track/local/local.dart
index afc464990..9e42972d8 100644
--- a/lib/src/track/local/local.dart
+++ b/lib/src/track/local/local.dart
@@ -75,31 +75,55 @@ mixin AudioTrack on Track {
     required AudioFrameCallback onFrame,
     AudioRendererOptions options = const AudioRendererOptions(),
   }) {
-    final group = _captureGroups.putIfAbsent(options, () {
-      final g = _AudioCaptureGroup();
-      g.startFuture = _startCaptureGroup(g, options);
-      return g;
-    });
+    final group = _captureGroups.putIfAbsent(
+      options,
+      () => _AudioCaptureGroup(track: mediaStreamTrack, options: options),
+    );
     group.renderers.add(onFrame);
 
     return () async {
       group.renderers.remove(onFrame);
       if (group.renderers.isEmpty) {
         _captureGroups.remove(options);
-        await _stopCaptureGroup(group);
+        await group.stop();
       }
     };
   }
 
-  Future<void> _startCaptureGroup(
-    _AudioCaptureGroup group,
-    AudioRendererOptions options,
-  ) async {
+  @override
+  Future<void> onStarted() async {
+    logger.fine('AudioTrack.onStarted()');
+  }
+
+  @override
+  Future<void> onStopped() async {
+    logger.fine('AudioTrack.onStopped()');
+    for (final group in _captureGroups.values) {
+      await group.stop();
+    }
+    _captureGroups.clear();
+  }
+}
+
+class _AudioCaptureGroup {
+  final List<AudioFrameCallback> renderers = [];
+  late final Future<void> _startFuture;
+  AudioFrameCapture? _capture;
+  StreamSubscription? _subscription;
+
+  _AudioCaptureGroup({
+    required rtc.MediaStreamTrack track,
+    required AudioRendererOptions options,
+  }) {
+    _startFuture = _start(track, options);
+  }
+
+  Future<void> _start(rtc.MediaStreamTrack track, AudioRendererOptions options) async {
     final capture = createAudioFrameCapture();
-    group.capture = capture;
+    _capture = capture;
 
     final result = await capture.start(
-      track: mediaStreamTrack,
+      track: track,
       rendererId: Track.uuid.v4(),
       sampleRate: options.sampleRate,
       channels: options.channels,
@@ -111,43 +135,22 @@ mixin AudioTrack on Track {
       return;
     }
 
-    group.subscription = capture.frameStream.listen((frame) {
-      for (final renderer in List.of(group.renderers)) {
+    _subscription = capture.frameStream.listen((frame) {
+      for (final renderer in List.of(renderers)) {
         renderer(frame);
       }
     });
   }
 
-  Future<void> _stopCaptureGroup(_AudioCaptureGroup group) async {
-    await group.startFuture;
-    await group.subscription?.cancel();
-    group.subscription = null;
-    await group.capture?.stop();
-    group.capture = null;
-  }
-
-  @override
-  Future<void> onStarted() async {
-    logger.fine('AudioTrack.onStarted()');
-  }
-
-  @override
-  Future<void> onStopped() async {
-    logger.fine('AudioTrack.onStopped()');
-    for (final group in _captureGroups.values) {
-      await _stopCaptureGroup(group);
-    }
-    _captureGroups.clear();
+  Future<void> stop() async {
+    await _startFuture;
+    await _subscription?.cancel();
+    _subscription = null;
+    await _capture?.stop();
+    _capture = null;
   }
 }
 
-class _AudioCaptureGroup {
-  AudioFrameCapture? capture;
-  StreamSubscription? subscription;
-  Future<void>? startFuture;
-  final List<AudioFrameCallback> renderers = [];
-}
-
 /// Base class for [LocalAudioTrack] and [LocalVideoTrack].
 abstract class LocalTrack extends Track {
   /// Options used for this track

From 6218bb2c85ea34b61b347d8d49033325c194590f Mon Sep 17 00:00:00 2001
From: Hiroshi Horie <548776+hiroshihorie@users.noreply.github.com>
Date: Wed, 4 Mar 2026 11:39:00 +0900
Subject: [PATCH 10/10] android resampler

---
 .../kotlin/io/livekit/plugin/AudioRenderer.kt | 151 ++++++--------
 .../io/livekit/plugin/AudioResampler.kt       | 142 +++++++++++++
 .../io/livekit/plugin/AudioResamplerTest.kt   | 193 ++++++++++++++++++
 3 files changed, 394 insertions(+), 92 deletions(-)
 create mode 100644 android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt
 create mode 100644 android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt

diff --git a/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt b/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt
index 1138723bf..43b5646ba 100644
--- a/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt
+++ b/android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt
@@ -110,12 +110,9 @@ class AudioRenderer(
   }
 
   /**
-   * Converts audio data to raw interleaved bytes.
+   * Converts audio data to raw interleaved bytes with resampling.
    *
-   * If source and target channel counts match, data is copied directly.
-   * If target requests fewer channels, the first channels are kept and interleaved.
-   *
-   * Sends raw byte arrays instead of boxed sample lists.
+   * Pipeline: read int16 → resample → channel reduce → format convert (int16/float32)
    */
   private fun convertAudioData(
     audioData: ByteBuffer,
@@ -138,16 +135,7 @@ class AudioRenderer(
       return null
     }
 
-    val bytesPerSample = 2 // 16-bit
-    val bytesPerFrame = numberOfChannels * bytesPerSample
-    if (bytesPerFrame <= 0) {
-      logDroppedFrame("Invalid bytesPerFrame: $bytesPerFrame")
-      return null
-    }
-
-    val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1)
-    val outChannels = requestedChannels.coerceAtMost(numberOfChannels)
-
+    val bytesPerFrame = numberOfChannels * 2
     val buffer = audioData.duplicate()
     buffer.order(ByteOrder.LITTLE_ENDIAN)
     buffer.rewind()
@@ -159,7 +147,7 @@ class AudioRenderer(
     }
 
     val expectedBytes = numberOfFrames.toLong() * bytesPerFrame.toLong()
-    val frameLength = if (expectedBytes <= availableBytes.toLong()) {
+    val srcFrames = if (expectedBytes <= availableBytes.toLong()) {
       numberOfFrames
     } else {
       val availableFrames = availableBytes / bytesPerFrame
@@ -173,24 +161,71 @@ class AudioRenderer(
       availableFrames
     }
 
+    // Step 1: Read source int16 samples into ShortArray
+    val src = ShortArray(srcFrames * numberOfChannels)
+    for (i in src.indices) {
+      src[i] = buffer.short
+    }
+
+    // Step 2: Resample to target sample rate
+    val resampleResult = AudioResampler.resample(
+      src, srcFrames, sampleRate, targetFormat.sampleRate, numberOfChannels
+    )
+    val resampled = resampleResult.samples
+    val outFrames = resampleResult.frameCount
+
+    if (outFrames <= 0) {
+      logDroppedFrame("Resampled frame count is 0")
+      return null
+    }
+
+    // Step 3: Channel reduction + format conversion
+    val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1)
+    val outChannels = requestedChannels.coerceAtMost(numberOfChannels)
+
     val result = mutableMapOf<String, Any>(
-      "sampleRate" to sampleRate,
+      "sampleRate" to targetFormat.sampleRate,
       "channels" to outChannels,
-      "frameLength" to frameLength,
+      "frameLength" to outFrames,
     )
 
     when (targetFormat.commonFormat) {
-      "int16" -> {
-        result["commonFormat"] = "int16"
-        result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength)
-      }
       "float32" -> {
         result["commonFormat"] = "float32"
-        result["data"] = extractAsFloat32Bytes(buffer, numberOfChannels, outChannels, frameLength)
+        val out = ByteArray(outFrames * outChannels * 4)
+        val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
+        for (f in 0 until outFrames) {
+          for (ch in 0 until outChannels) {
+            val sample = resampled[f * numberOfChannels + ch].toFloat() / Short.MAX_VALUE
+            outBuf.putFloat((f * outChannels + ch) * 4, sample)
+          }
+        }
+        result["data"] = out
       }
       else -> {
         result["commonFormat"] = "int16"
-        result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength)
+        if (outChannels == numberOfChannels) {
+          // Fast path: no channel reduction — bulk copy resampled data
+          val out = ByteArray(outFrames * outChannels * 2)
+          val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
+          for (i in 0 until outFrames * outChannels) {
+            outBuf.putShort(i * 2, resampled[i])
+          }
+          result["data"] = out
+        } else {
+          // Channel reduction: keep first outChannels
+          val out = ByteArray(outFrames * outChannels * 2)
+          val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
+          for (f in 0 until outFrames) {
+            for (ch in 0 until outChannels) {
+              outBuf.putShort(
+                (f * outChannels + ch) * 2,
+                resampled[f * numberOfChannels + ch]
+              )
+            }
+          }
+          result["data"] = out
+        }
       }
     }
 
@@ -203,74 +238,6 @@ class AudioRenderer(
       Log.w(TAG, "Dropping audio frame #$droppedFrameCount for rendererId=$rendererId: $reason")
     }
   }
-
-  /**
-   * Extracts int16 PCM bytes from an int16 source buffer.
-   *
-   * Fast path when channel counts match (direct copy).
-   * Otherwise keeps only the first [outChannels] channels, interleaved.
-   */
-  private fun extractAsInt16Bytes(
-    buffer: ByteBuffer,
-    srcChannels: Int,
-    outChannels: Int,
-    numberOfFrames: Int
-  ): ByteArray {
-    // Fast path: matching channel count — bulk copy.
-    if (srcChannels == outChannels) {
-      val totalBytes = numberOfFrames * outChannels * 2
-      val out = ByteArray(totalBytes)
-      buffer.get(out, 0, totalBytes.coerceAtMost(buffer.remaining()))
-      return out
-    }
-
-    // Channel reduction: keep first outChannels.
-    val out = ByteArray(numberOfFrames * outChannels * 2)
-    val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
-
-    for (frame in 0 until numberOfFrames) {
-      val srcOffset = frame * srcChannels * 2
-      for (ch in 0 until outChannels) {
-        val byteIndex = srcOffset + ch * 2
-        if (byteIndex + 1 < buffer.capacity()) {
-          buffer.position(byteIndex)
-          outBuf.putShort((frame * outChannels + ch) * 2, buffer.short)
-        }
-      }
-    }
-
-    return out
-  }
-
-  /**
-   * Converts int16 PCM source to float32 bytes.
-   *
-   * Each int16 sample is scaled to the [-1.0, 1.0] range.
-   * Only the first [outChannels] channels are kept.
-   */
-  private fun extractAsFloat32Bytes(
-    buffer: ByteBuffer,
-    srcChannels: Int,
-    outChannels: Int,
-    numberOfFrames: Int
-  ): ByteArray {
-    val out = ByteArray(numberOfFrames * outChannels * 4)
-    val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
-
-    for (frame in 0 until numberOfFrames) {
-      val srcOffset = frame * srcChannels * 2
-      for (ch in 0 until outChannels) {
-        val byteIndex = srcOffset + ch * 2
-        if (byteIndex + 1 < buffer.capacity()) {
-          buffer.position(byteIndex)
-          val sampleFloat = buffer.short.toFloat() / Short.MAX_VALUE
-          outBuf.putFloat((frame * outChannels + ch) * 4, sampleFloat)
-        }
-      }
-    }
-
-    return out
-  }
 }
 
 /**
diff --git a/android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt b/android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt
new file mode 100644
index 000000000..f008bad77
--- /dev/null
+++ b/android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt
@@ -0,0 +1,142 @@
+/*
+ * Copyright 2024 LiveKit, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package io.livekit.plugin
+
+/**
+ * Pure audio resampler for interleaved int16 PCM data.
+ *
+ * - Same rate: passthrough (returns input array as-is)
+ * - Upsampling: linear interpolation between adjacent samples
+ * - Downsampling: box filter (averages source samples per output sample) to prevent aliasing
+ */
+object AudioResampler {
+
+  /**
+   * Resample interleaved int16 PCM audio.
+   *
+   * @param src         Interleaved int16 samples (channels interleaved per frame)
+   * @param srcFrames   Number of frames in [src] (total samples = srcFrames * channels)
+   * @param srcRate     Source sample rate in Hz
+   * @param targetRate  Target sample rate in Hz
+   * @param channels    Number of interleaved channels
+   * @return Resampled interleaved int16 samples. Returns [src] unchanged when rates match.
+   */
+  fun resample(
+    src: ShortArray,
+    srcFrames: Int,
+    srcRate: Int,
+    targetRate: Int,
+    channels: Int
+  ): ResampleResult {
+    if (srcRate == targetRate || srcFrames <= 0 || channels <= 0) {
+      return ResampleResult(src, srcFrames)
+    }
+
+    val outFrames = ((srcFrames.toLong() * targetRate) / srcRate).toInt()
+    if (outFrames <= 0) {
+      return ResampleResult(ShortArray(0), 0)
+    }
+
+    val resampled = if (targetRate > srcRate) {
+      upsample(src, srcFrames, outFrames, channels)
+    } else {
+      downsample(src, srcFrames, outFrames, srcRate, targetRate, channels)
+    }
+
+    return ResampleResult(resampled, outFrames)
+  }
+
+  /**
+   * Linear interpolation upsampling.
+   */
+  private fun upsample(
+    src: ShortArray,
+    srcFrames: Int,
+    outFrames: Int,
+    channels: Int
+  ): ShortArray {
+    val out = ShortArray(outFrames * channels)
+
+    // Edge case: single source frame — just repeat it
+    if (srcFrames <= 1) {
+      for (f in 0 until outFrames) {
+        for (ch in 0 until channels) {
+          out[f * channels + ch] = src[ch]
+        }
+      }
+      return out
+    }
+
+    val ratio = srcFrames.toDouble() / outFrames.toDouble()
+
+    for (f in 0 until outFrames) {
+      val srcPos = f * ratio
+      val idx = srcPos.toInt().coerceAtMost(srcFrames - 2)
+      val frac = (srcPos - idx).toFloat()
+
+      for (ch in 0 until channels) {
+        val s0 = src[idx * channels + ch]
+        val s1 = src[(idx + 1) * channels + ch]
+        out[f * channels + ch] = (s0 + frac * (s1 - s0)).toInt()
+          .coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
+          .toShort()
+      }
+    }
+
+    return out
+  }
+
+  /**
+   * Box filter downsampling. Averages all source samples that map to each
+   * output sample, acting as a low-pass filter to prevent aliasing.
+   */
+  private fun downsample(
+    src: ShortArray,
+    srcFrames: Int,
+    outFrames: Int,
+    srcRate: Int,
+    targetRate: Int,
+    channels: Int
+  ): ShortArray {
+    val out = ShortArray(outFrames * channels)
+    val ratio = srcRate.toDouble() / targetRate.toDouble()
+
+    for (f in 0 until outFrames) {
+      val srcStart = (f * ratio).toInt()
+      val srcEnd = ((f + 1) * ratio).toInt().coerceAtMost(srcFrames)
+
+      for (ch in 0 until channels) {
+        var sum = 0L
+        for (i in srcStart until srcEnd) {
+          sum += src[i * channels + ch]
+        }
+        val count = srcEnd - srcStart
+        out[f * channels + ch] = if (count > 0) {
+          (sum / count).toInt()
+            .coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
+            .toShort()
+        } else {
+          0
+        }
+      }
+    }
+
+    return out
+  }
+
+  data class ResampleResult(val samples: ShortArray, val frameCount: Int)
+}
diff --git a/android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt b/android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt
new file mode 100644
index 000000000..df9c27acb
--- /dev/null
+++ b/android/src/test/kotlin/io/livekit/plugin/AudioResamplerTest.kt
@@ -0,0 +1,193 @@
+package io.livekit.plugin
+
+import kotlin.math.PI
+import kotlin.math.roundToInt
+import kotlin.math.sin
+import kotlin.test.Test
+import kotlin.test.assertEquals
+import kotlin.test.assertTrue
+
+class AudioResamplerTest {
+
+  // --- Passthrough ---
+
+  @Test
+  fun `same rate returns identical samples`() {
+    val src = shortArrayOf(100, 200, 300, 400, 500)
+    val result = AudioResampler.resample(src, 5, 48000, 48000, 1)
+    assertEquals(5, result.frameCount)
+    assertTrue(src.contentEquals(result.samples))
+  }
+
+  // --- Upsample ---
+
+  @Test
+  fun `upsample 8kHz to 48kHz produces 6x frames`() {
+    val srcFrames = 80
+    val src = ShortArray(srcFrames) { (it * 100).toShort() }
+    val result = AudioResampler.resample(src, srcFrames, 8000, 48000, 1)
+    assertEquals(480, result.frameCount)
+    assertEquals(480, result.samples.size)
+  }
+
+  @Test
+  fun `upsample preserves first and last sample`() {
+    val src = shortArrayOf(0, 1000, 2000, 3000)
+    val result = AudioResampler.resample(src, 4, 8000, 16000, 1)
+    assertEquals(0, result.samples[0].toInt())
+  }
+
+  @Test
+  fun `upsample with stereo preserves channel count`() {
+    // 4 frames, 2 channels: [L0, R0, L1, R1, L2, R2, L3, R3]
+    val src = shortArrayOf(100, -100, 200, -200, 300, -300, 400, -400)
+    val result = AudioResampler.resample(src, 4, 8000, 16000, 2)
+    assertEquals(8, result.frameCount)
+    assertEquals(16, result.samples.size) // 8 frames * 2 channels
+  }
+
+  // --- Downsample ---
+
+  @Test
+  fun `downsample 48kHz to 8kHz produces one-sixth frames`() {
+    val srcFrames = 480
+    val src = ShortArray(srcFrames) { 1000 }
+    val result = AudioResampler.resample(src, srcFrames, 48000, 8000, 1)
+    assertEquals(80, result.frameCount)
+    assertEquals(80, result.samples.size)
+    for (s in result.samples) {
+      assertEquals(1000, s.toInt())
+    }
+  }
+
+  @Test
+  fun `downsample averages samples correctly`() {
+    // 6 samples at 48kHz → 1 sample at 8kHz, should average the 6 values
+    val src = shortArrayOf(100, 200, 300, 400, 500, 600)
+    val result = AudioResampler.resample(src, 6, 48000, 8000, 1)
+    assertEquals(1, result.frameCount)
+    // Average of 100..600 = 350
+    assertEquals(350, result.samples[0].toInt())
+  }
+
+  // --- Sine wave preservation (signal below Nyquist survives) ---
+
+  @Test
+  fun `440Hz sine survives downsample from 48kHz to 16kHz`() {
+    val srcRate = 48000
+    val targetRate = 16000
+    val freq = 440.0
+    val durationSec = 0.1
+    val srcFrames = (srcRate * durationSec).toInt()
+
+    val src = ShortArray(srcFrames) { i ->
+      (sin(2.0 * PI * freq * i / srcRate) * 20000).roundToInt().toShort()
+    }
+
+    val result = AudioResampler.resample(src, srcFrames, srcRate, targetRate, 1)
+    val expectedFrames = (srcFrames.toLong() * targetRate / srcRate).toInt()
+    assertEquals(expectedFrames, result.frameCount)
+
+    // Verify the resampled signal still contains 440Hz by checking
+    // zero-crossings to estimate frequency
+    val zeroCrossings = countZeroCrossings(result.samples, result.frameCount)
+    // Each full cycle has 2 zero crossings
+    val estimatedFreq = (zeroCrossings / 2.0) / durationSec
+    // Allow 10% tolerance
+    assertTrue(
+      estimatedFreq > freq * 0.9 && estimatedFreq < freq * 1.1,
+      "Expected ~440Hz, estimated ${estimatedFreq}Hz (zeroCrossings=$zeroCrossings)"
+    )
+  }
+
+  // --- Aliasing rejection (signal above Nyquist is attenuated) ---
+
+  @Test
+  fun `5kHz sine is attenuated when downsampled to 8kHz`() {
+    val srcRate = 48000
+    val targetRate = 8000 // Nyquist = 4kHz
+    val freq = 5000.0 // Above Nyquist
+    val durationSec = 0.05
+    val srcFrames = (srcRate * durationSec).toInt()
+
+    val src = ShortArray(srcFrames) { i ->
+      (sin(2.0 * PI * freq * i / srcRate) * 20000).roundToInt().toShort()
+    }
+
+    val result = AudioResampler.resample(src, srcFrames, srcRate, targetRate, 1)
+
+    // With box filter, the 5kHz signal should be significantly attenuated.
+    val inputRms = rms(src, srcFrames)
+    val outputRms = rms(result.samples, result.frameCount)
+
+    // Output RMS should be at most 50% of input RMS.
+    assertTrue(
+      outputRms < inputRms * 0.5,
+      "5kHz signal should be attenuated: inputRms=$inputRms, outputRms=$outputRms, " +
+        "ratio=${outputRms / inputRms}"
+    )
+  }
+
+  @Test
+  fun `1kHz sine is preserved when downsampled to 8kHz`() {
+    val srcRate = 48000
+    val targetRate = 8000 // Nyquist = 4kHz
+    val freq = 1000.0 // Well below Nyquist
+    val durationSec = 0.05
+    val srcFrames = (srcRate * durationSec).toInt()
+
+    val src = ShortArray(srcFrames) { i ->
+      (sin(2.0 * PI * freq * i / srcRate) * 20000).roundToInt().toShort()
+    }
+
+    val result = AudioResampler.resample(src, srcFrames, srcRate, targetRate, 1)
+
+    // 1kHz is well below Nyquist, RMS should remain roughly similar (within 30%)
+    val inputRms = rms(src, srcFrames)
+    val outputRms = rms(result.samples, result.frameCount)
+
+    assertTrue(
+      outputRms > inputRms * 0.7,
+      "1kHz signal should be preserved: inputRms=$inputRms, outputRms=$outputRms, " +
+        "ratio=${outputRms / inputRms}"
+    )
+  }
+
+  // --- Edge cases ---
+
+  @Test
+  fun `zero frames returns empty`() {
+    val result = AudioResampler.resample(ShortArray(0), 0, 48000, 16000, 1)
+    assertEquals(0, result.frameCount)
+  }
+
+  @Test
+  fun `single frame upsample`() {
+    val src = shortArrayOf(1000)
+    val result = AudioResampler.resample(src, 1, 8000, 48000, 1)
+    assertEquals(6, result.frameCount)
+  }
+
+  // --- Helpers ---
+
+  private fun countZeroCrossings(samples: ShortArray, count: Int): Int {
+    var crossings = 0
+    for (i in 1 until count) {
+      if ((samples[i - 1] >= 0 && samples[i] < 0) ||
+        (samples[i - 1] < 0 && samples[i] >= 0)
+      ) {
+        crossings++
+      }
+    }
+    return crossings
+  }
+
+  private fun rms(samples: ShortArray, count: Int): Double {
+    if (count == 0) return 0.0
+    var sumSq = 0.0
+    for (i in 0 until count) {
+      sumSq += samples[i].toDouble() * samples[i].toDouble()
+    }
+    return kotlin.math.sqrt(sumSq / count)
+  }
+}