Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .changes/audio-renderer-api
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
minor type="added" "Add audio renderer API for receiving raw audio frames"
151 changes: 59 additions & 92 deletions android/src/main/kotlin/io/livekit/plugin/AudioRenderer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,9 @@ class AudioRenderer(
}

/**
* Converts audio data to raw interleaved bytes.
* Converts audio data to raw interleaved bytes with resampling.
*
* If source and target channel counts match, data is copied directly.
* If target requests fewer channels, the first channels are kept and interleaved.
*
* Sends raw byte arrays instead of boxed sample lists.
* Pipeline: read int16 → resample → channel reduce → format convert (int16/float32)
*/
private fun convertAudioData(
audioData: ByteBuffer,
Expand All @@ -138,16 +135,7 @@ class AudioRenderer(
return null
}

val bytesPerSample = 2 // 16-bit
val bytesPerFrame = numberOfChannels * bytesPerSample
if (bytesPerFrame <= 0) {
logDroppedFrame("Invalid bytesPerFrame: $bytesPerFrame")
return null
}

val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1)
val outChannels = requestedChannels.coerceAtMost(numberOfChannels)

val bytesPerFrame = numberOfChannels * 2
val buffer = audioData.duplicate()
buffer.order(ByteOrder.LITTLE_ENDIAN)
buffer.rewind()
Expand All @@ -159,7 +147,7 @@ class AudioRenderer(
}

val expectedBytes = numberOfFrames.toLong() * bytesPerFrame.toLong()
val frameLength = if (expectedBytes <= availableBytes.toLong()) {
val srcFrames = if (expectedBytes <= availableBytes.toLong()) {
numberOfFrames
} else {
val availableFrames = availableBytes / bytesPerFrame
Expand All @@ -173,24 +161,71 @@ class AudioRenderer(
availableFrames
}

// Step 1: Read source int16 samples into ShortArray
val src = ShortArray(srcFrames * numberOfChannels)
for (i in src.indices) {
src[i] = buffer.short
}

// Step 2: Resample to target sample rate
val resampleResult = AudioResampler.resample(
src, srcFrames, sampleRate, targetFormat.sampleRate, numberOfChannels
)
val resampled = resampleResult.samples
val outFrames = resampleResult.frameCount

if (outFrames <= 0) {
logDroppedFrame("Resampled frame count is 0")
return null
}

// Step 3: Channel reduction + format conversion
val requestedChannels = targetFormat.numberOfChannels.coerceAtLeast(1)
val outChannels = requestedChannels.coerceAtMost(numberOfChannels)

val result = mutableMapOf<String, Any>(
"sampleRate" to sampleRate,
"sampleRate" to targetFormat.sampleRate,
"channels" to outChannels,
"frameLength" to frameLength,
"frameLength" to outFrames,
)

when (targetFormat.commonFormat) {
"int16" -> {
result["commonFormat"] = "int16"
result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength)
}
"float32" -> {
result["commonFormat"] = "float32"
result["data"] = extractAsFloat32Bytes(buffer, numberOfChannels, outChannels, frameLength)
val out = ByteArray(outFrames * outChannels * 4)
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
for (f in 0 until outFrames) {
for (ch in 0 until outChannels) {
val sample = resampled[f * numberOfChannels + ch].toFloat() / Short.MAX_VALUE
outBuf.putFloat((f * outChannels + ch) * 4, sample)
}
}
result["data"] = out
}
else -> {
result["commonFormat"] = "int16"
result["data"] = extractAsInt16Bytes(buffer, numberOfChannels, outChannels, frameLength)
if (outChannels == numberOfChannels) {
// Fast path: no channel reduction — bulk copy resampled data
val out = ByteArray(outFrames * outChannels * 2)
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
for (i in 0 until outFrames * outChannels) {
outBuf.putShort(i * 2, resampled[i])
}
result["data"] = out
} else {
// Channel reduction: keep first outChannels
val out = ByteArray(outFrames * outChannels * 2)
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)
for (f in 0 until outFrames) {
for (ch in 0 until outChannels) {
outBuf.putShort(
(f * outChannels + ch) * 2,
resampled[f * numberOfChannels + ch]
)
}
}
result["data"] = out
}
}
}

Expand All @@ -203,74 +238,6 @@ class AudioRenderer(
Log.w(TAG, "Dropping audio frame #$droppedFrameCount for rendererId=$rendererId: $reason")
}
}

/**
* Extracts int16 PCM bytes from an int16 source buffer.
*
* Fast path when channel counts match (direct copy).
* Otherwise keeps only the first [outChannels] channels, interleaved.
*/
private fun extractAsInt16Bytes(
buffer: ByteBuffer,
srcChannels: Int,
outChannels: Int,
numberOfFrames: Int
): ByteArray {
// Fast path: matching channel count — bulk copy.
if (srcChannels == outChannels) {
val totalBytes = numberOfFrames * outChannels * 2
val out = ByteArray(totalBytes)
buffer.get(out, 0, totalBytes.coerceAtMost(buffer.remaining()))
return out
}

// Channel reduction: keep first outChannels.
val out = ByteArray(numberOfFrames * outChannels * 2)
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)

for (frame in 0 until numberOfFrames) {
val srcOffset = frame * srcChannels * 2
for (ch in 0 until outChannels) {
val byteIndex = srcOffset + ch * 2
if (byteIndex + 1 < buffer.capacity()) {
buffer.position(byteIndex)
outBuf.putShort((frame * outChannels + ch) * 2, buffer.short)
}
}
}

return out
}

/**
* Converts int16 PCM source to float32 bytes.
*
* Each int16 sample is scaled to the [-1.0, 1.0] range.
* Only the first [outChannels] channels are kept.
*/
private fun extractAsFloat32Bytes(
buffer: ByteBuffer,
srcChannels: Int,
outChannels: Int,
numberOfFrames: Int
): ByteArray {
val out = ByteArray(numberOfFrames * outChannels * 4)
val outBuf = ByteBuffer.wrap(out).order(ByteOrder.LITTLE_ENDIAN)

for (frame in 0 until numberOfFrames) {
val srcOffset = frame * srcChannels * 2
for (ch in 0 until outChannels) {
val byteIndex = srcOffset + ch * 2
if (byteIndex + 1 < buffer.capacity()) {
buffer.position(byteIndex)
val sampleFloat = buffer.short.toFloat() / Short.MAX_VALUE
outBuf.putFloat((frame * outChannels + ch) * 4, sampleFloat)
}
}
}

return out
}
}

/**
Expand Down
142 changes: 142 additions & 0 deletions android/src/main/kotlin/io/livekit/plugin/AudioResampler.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Copyright 2024 LiveKit, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package io.livekit.plugin

/**
* Pure audio resampler for interleaved int16 PCM data.
*
* - Same rate: passthrough (returns input array as-is)
* - Upsampling: linear interpolation between adjacent samples
* - Downsampling: box filter (averages source samples per output sample) to prevent aliasing
*/
object AudioResampler {

/**
* Resample interleaved int16 PCM audio.
*
* @param src Interleaved int16 samples (channels interleaved per frame)
* @param srcFrames Number of frames in [src] (total samples = srcFrames * channels)
* @param srcRate Source sample rate in Hz
* @param targetRate Target sample rate in Hz
* @param channels Number of interleaved channels
* @return Resampled interleaved int16 samples. Returns [src] unchanged when rates match.
*/
fun resample(
src: ShortArray,
srcFrames: Int,
srcRate: Int,
targetRate: Int,
channels: Int
): ResampleResult {
if (srcRate == targetRate || srcFrames <= 0 || channels <= 0) {
return ResampleResult(src, srcFrames)
}

val outFrames = ((srcFrames.toLong() * targetRate) / srcRate).toInt()
if (outFrames <= 0) {
return ResampleResult(ShortArray(0), 0)
}

val resampled = if (targetRate > srcRate) {
upsample(src, srcFrames, outFrames, channels)
} else {
downsample(src, srcFrames, outFrames, srcRate, targetRate, channels)
}

return ResampleResult(resampled, outFrames)
}

/**
* Linear interpolation upsampling.
*/
private fun upsample(
src: ShortArray,
srcFrames: Int,
outFrames: Int,
channels: Int
): ShortArray {
val out = ShortArray(outFrames * channels)

// Edge case: single source frame — just repeat it
if (srcFrames <= 1) {
for (f in 0 until outFrames) {
for (ch in 0 until channels) {
out[f * channels + ch] = src[ch]
}
}
return out
}

val ratio = srcFrames.toDouble() / outFrames.toDouble()

for (f in 0 until outFrames) {
val srcPos = f * ratio
val idx = srcPos.toInt().coerceAtMost(srcFrames - 2)
val frac = (srcPos - idx).toFloat()

for (ch in 0 until channels) {
val s0 = src[idx * channels + ch]
val s1 = src[(idx + 1) * channels + ch]
out[f * channels + ch] = (s0 + frac * (s1 - s0)).toInt()
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
.toShort()
}
}

return out
}

/**
* Box filter downsampling. Averages all source samples that map to each
* output sample, acting as a low-pass filter to prevent aliasing.
*/
private fun downsample(
src: ShortArray,
srcFrames: Int,
outFrames: Int,
srcRate: Int,
targetRate: Int,
channels: Int
): ShortArray {
val out = ShortArray(outFrames * channels)
val ratio = srcRate.toDouble() / targetRate.toDouble()

for (f in 0 until outFrames) {
val srcStart = (f * ratio).toInt()
val srcEnd = ((f + 1) * ratio).toInt().coerceAtMost(srcFrames)

for (ch in 0 until channels) {
var sum = 0L
for (i in srcStart until srcEnd) {
sum += src[i * channels + ch]
}
val count = srcEnd - srcStart
out[f * channels + ch] = if (count > 0) {
(sum / count).toInt()
.coerceIn(Short.MIN_VALUE.toInt(), Short.MAX_VALUE.toInt())
.toShort()
} else {
0
}
}
}

return out
}

data class ResampleResult(val samples: ShortArray, val frameCount: Int)
}
Loading