WhisperLib is a powerful Android library built on TensorFlow Lite that enables you to easily integrate speech recognition capabilities into your Android applications with high performance and accuracy.
- π€ Real-time audio recording with high quality
- π§ Speech recognition using Whisper model
- π Multilingual support (English, Vietnamese, Chinese, etc.)
- β‘ High performance with native C++ engine
- π§ Simple API and easy to use
- π± Compatible with Android API 29+
- Android API Level: 29+ (Android 10+)
- Architecture: ARM64-v8a, ARMv7
- RAM: Minimum 2GB
- Storage: ~50MB for model and library
- Download
whisper-lib-release.aarfrom Releases - Copy the file to your project's
libsdirectory:app/libs/whisper-lib-release.aar
Add to your app/build.gradle.kts:
android {
// ... existing configuration
packagingOptions {
pickFirst '**/libc++_shared.so'
pickFirst '**/libjsc.so'
}
}
dependencies {
// WhisperLib AAR
implementation(files("libs/whisper-lib-release.aar"))
// Or use fileTree
implementation(fileTree(mapOf("dir" to "libs", "include" to listOf("*.aar"))))
// Required dependencies
implementation("androidx.appcompat:appcompat:1.6.1")
implementation("org.tensorflow:tensorflow-lite:2.14.0")
implementation("org.tensorflow:tensorflow-lite-support:0.4.4")
}Add to your AndroidManifest.xml:
<uses-permission android:name="android.permission.RECORD_AUDIO" />
<uses-permission android:name="android.permission.WRITE_EXTERNAL_STORAGE" />
<uses-permission android:name="android.permission.READ_EXTERNAL_STORAGE" />class MainActivity : AppCompatActivity() {
private val REQUEST_RECORD_AUDIO = 1001
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
// Request audio recording permission
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
!= PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this,
arrayOf(Manifest.permission.RECORD_AUDIO), REQUEST_RECORD_AUDIO)
}
}
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == REQUEST_RECORD_AUDIO) {
if (grantResults.isNotEmpty() && grantResults[0] == PackageManager.PERMISSION_GRANTED) {
// Permission granted, can use WhisperLib
}
}
}
}import com.hadtun.whisperlib.WhisperLib
class MainActivity : AppCompatActivity() {
private lateinit var whisperLib: WhisperLib
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
// Initialize WhisperLib
whisperLib = WhisperLib.init(this)
// Load default model
val success = whisperLib.loadDefaultModel()
if (success) {
Log.d("WhisperLib", "Model loaded successfully")
}
}
}class VoiceRecognitionActivity : AppCompatActivity() {
private lateinit var whisperLib: WhisperLib
private fun startRecording() {
val success = whisperLib.startRecording()
if (success) {
Log.d("VoiceRecognition", "Recording started")
// Update UI to show recording state
}
}
private fun stopRecording() {
val success = whisperLib.stopRecording()
if (success) {
Log.d("VoiceRecognition", "Recording stopped")
// Get recognition result
val transcription = whisperLib.getTranscription()
Log.d("VoiceRecognition", "Transcription: $transcription")
// Display result on UI
updateUI(transcription)
}
}
private fun updateUI(text: String) {
runOnUiThread {
// Update TextView with result
textViewResult.text = text
}
}
}class ChatActivity : AppCompatActivity() {
private lateinit var whisperLib: WhisperLib
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
setContentView(R.layout.activity_chat)
whisperLib = WhisperLib.init(this)
whisperLib.loadDefaultModel()
setupChatButton()
}
private fun setupChatButton() {
val chatButton = findViewById<Button>(R.id.btnChat)
chatButton.setOnTouchListener { _, event ->
when (event.action) {
MotionEvent.ACTION_DOWN -> {
// Start recording when press and hold
whisperLib.startRecording()
chatButton.setBackgroundColor(Color.RED)
true
}
MotionEvent.ACTION_UP, MotionEvent.ACTION_CANCEL -> {
// Stop recording and recognize when release button
whisperLib.stopRecording()
chatButton.setBackgroundColor(Color.BLUE)
// Get result and display
val result = whisperLib.getTranscription()
displayResult(result)
true
}
else -> false
}
}
}
private fun displayResult(text: String) {
runOnUiThread {
textViewResult.text = text
}
}
}class AudioFileActivity : AppCompatActivity() {
private lateinit var whisperLib: WhisperLib
private fun transcribeAudioFile(filePath: String) {
// Transcribe from existing audio file
val result = whisperLib.transcribeFile(filePath)
if (result.isNotEmpty()) {
Log.d("AudioFile", "Transcription: $result")
// Process result
} else {
Log.e("AudioFile", "Transcription failed")
}
}
}class StatusActivity : AppCompatActivity() {
private lateinit var whisperLib: WhisperLib
private fun checkStatus() {
// Check if currently recording
val isRecording = whisperLib.isRecording()
// Get recorded audio file path
val audioPath = whisperLib.getRecordedAudioPath()
Log.d("Status", "Is recording: $isRecording")
Log.d("Status", "Audio path: $audioPath")
}
override fun onDestroy() {
super.onDestroy()
// Release resources
whisperLib.release()
}
}| Method | Description | Return Type |
|---|---|---|
init(context) |
Initialize WhisperLib | WhisperLib |
loadDefaultModel() |
Load default model | Boolean |
loadModel(path, isMultilingual) |
Load custom model | Boolean |
startRecording() |
Start recording | Boolean |
stopRecording() |
Stop recording | Boolean |
getTranscription() |
Get recognition result | String |
transcribeFile(filePath) |
Transcribe from file | String |
isRecording() |
Check recording status | Boolean |
getRecordedAudioPath() |
Get audio file path | String? |
release() |
Release resources | Unit |
class CompleteExample : AppCompatActivity() {
private lateinit var whisperLib: WhisperLib
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
// 1. Initialize
whisperLib = WhisperLib.init(this)
// 2. Load model
val modelLoaded = whisperLib.loadDefaultModel()
if (!modelLoaded) {
Log.e("WhisperLib", "Failed to load model")
return
}
// 3. Setup UI
setupUI()
}
private fun setupUI() {
val recordButton = findViewById<Button>(R.id.btnRecord)
val resultText = findViewById<TextView>(R.id.tvResult)
recordButton.setOnClickListener {
if (whisperLib.isRecording()) {
// Stop recording
whisperLib.stopRecording()
val result = whisperLib.getTranscription()
resultText.text = result
recordButton.text = "Start Recording"
} else {
// Start recording
whisperLib.startRecording()
recordButton.text = "Stop Recording"
}
}
}
override fun onDestroy() {
super.onDestroy()
whisperLib.release()
}
}// Record and convert to text note
whisperLib.startRecording()
// ... after user finishes speaking
whisperLib.stopRecording()
val note = whisperLib.getTranscription()
saveNoteToDatabase(note)// Search using voice
whisperLib.startRecording()
// ... user speaks search keywords
whisperLib.stopRecording()
val searchQuery = whisperLib.getTranscription()
performSearch(searchQuery)// Control app using voice
whisperLib.startRecording()
// ... user speaks command
whisperLib.stopRecording()
val command = whisperLib.getTranscription()
executeVoiceCommand(command)// Convert speech to text in real-time
whisperLib.startRecording()
// Can use timer for periodic recognition
val timer = Timer()
timer.scheduleAtFixedRate(object : TimerTask() {
override fun run() {
if (whisperLib.isRecording()) {
val partialResult = whisperLib.getTranscription()
updateRealtimeUI(partialResult)
}
}
}, 0, 2000) // Every 2 seconds- First model load: May take 2-5 seconds
- Recognition time: 1-3 seconds depending on audio length
- RAM usage: ~100-200MB when active
- Battery: Consumes battery when recording continuously
- Always check permissions before using
- Release resources in
onDestroy() - Handle errors when loading model or recording
- Test on real device to ensure audio quality
- Use background thread for heavy tasks
1. "Permission denied"
// Solution: Check and request permission
if (ContextCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO)
!= PackageManager.PERMISSION_GRANTED) {
ActivityCompat.requestPermissions(this,
arrayOf(Manifest.permission.RECORD_AUDIO), REQUEST_CODE)
}2. "Model not loaded"
// Solution: Check model path and file exists
val success = whisperLib.loadDefaultModel()
if (!success) {
Log.e("WhisperLib", "Check if model files exist in assets")
}3. "No audio input"
// Solution: Check microphone and audio settings
if (!whisperLib.isRecording()) {
Log.e("WhisperLib", "Check microphone permissions and hardware")
}You can create custom Whisper models for specific languages and tasks:
pip install tensorflow==2.14.0
pip install transformers
pip install datasetsOpen models_and_scripts/whisper_tflite_model_generation_and_test.ipynb in Jupyter Notebook:
# Configure model as per requirement
model_name = "whisper-base" # whisper-tiny, whisper-tiny.en, whisper-base, whisper-base.en, whisper-small, whisper-small.en
# Configure language and task
language_code = "<|en|>" # <|en|>, <|fr|>, <|hi|>, <|ko|>, <|de|>, <|zh|>, <|ja|>, <|es|>, <|ar|>, <|ru|>, ...
task_code = "<|transcribe|>" # <|transcribe|>, <|translate|>| Model | Size | Multilingual | English Only |
|---|---|---|---|
| whisper-tiny | ~39 MB | β | β (.en) |
| whisper-base | ~74 MB | β | β (.en) |
| whisper-small | ~244 MB | β | β (.en) |
| whisper-medium | ~769 MB | β | β (.en) |
| whisper-large | ~1550 MB | β | β |
// Load custom model
val customModelPath = "/path/to/your/custom_model.tflite"
val isMultilingual = true // or false for English-only
val success = whisperLib.loadModel(customModelPath, isMultilingual)- Copy
.tflitefile toapp/src/main/assets/ - Copy vocabulary
.binfile toapp/src/main/assets/ - Rebuild project
- Use whisper-tiny for mobile apps (lightest)
- Use whisper-base for balance between accuracy and size
- Use .en models if you only need English recognition
- Quantization can reduce model size by 50%
See detailed examples in the demo/ folder with features:
- Basic voice recording
- Press-and-hold chat button
- File audio transcription
- Real-time voice commands
We welcome contributions! Please:
- Fork the repository
- Create your feature branch (
git checkout -b feature/AmazingFeature) - Commit your changes (
git commit -m 'Add some AmazingFeature') - Push to the branch (
git push origin feature/AmazingFeature) - Open a Pull Request
Distributed under the MIT License. See LICENSE for more information.
- GitHub Issues: Create an issue
- OpenAI Whisper - Speech recognition model
- TensorFlow Lite - Machine learning framework
- Android AudioRecord - Android audio recording API
β If this library is helpful, please give us a star! β