NickMonrad · NickMonrad · May 2, 2026 · May 2, 2026 · May 2, 2026 · May 2, 2026
diff --git a/app/src/main/java/com/kernel/ai/navigation/KernelNavHost.kt b/app/src/main/java/com/kernel/ai/navigation/KernelNavHost.kt
@@ -51,6 +51,7 @@ import com.kernel.ai.feature.settings.ScheduledAlarmsScreen
 import com.kernel.ai.feature.settings.SettingsScreen
 import com.kernel.ai.feature.settings.SidePanelScreen
 import com.kernel.ai.feature.settings.UserProfileScreen
+import com.kernel.ai.feature.settings.VoiceScreen
 import kotlinx.coroutines.launch
 
 private const val ROUTE_LIST = "conversation_list"
@@ -61,6 +62,7 @@ private const val ROUTE_CHAT = "chat"
 private const val ROUTE_SETTINGS = "settings"
 private const val ROUTE_USER_PROFILE = "settings/user_profile"
 private const val ROUTE_MEMORY = "settings/memory"
+private const val ROUTE_VOICE = "settings/voice"
 private const val ROUTE_MODEL_SETTINGS = "settings/model_settings"
 private const val ROUTE_MODEL_MANAGEMENT = "settings/model_management?scrollTo={scrollTo}"
 private const val ARG_SCROLL_TO = "scrollTo"
@@ -357,6 +359,9 @@ fun KernelNavHost(
                         onNavigateToMemory = {
                             navController.navigate(ROUTE_MEMORY)
                         },
+                        onNavigateToVoice = {
+                            navController.navigate(ROUTE_VOICE)
+                        },
                         onNavigateToModelSettings = {
                             navController.navigate(ROUTE_MODEL_SETTINGS)
                         },
@@ -382,6 +387,12 @@ fun KernelNavHost(
                     )
                 }
 
+                composable(ROUTE_VOICE) {
+                    VoiceScreen(
+                        onBack = { navController.popBackStack() },
+                    )
+                }
+
                 composable(ROUTE_MODEL_SETTINGS) {
                     ModelSettingsScreen(
                         onBack = { navController.popBackStack() },

diff --git a/core/voice/build.gradle.kts b/core/voice/build.gradle.kts
@@ -31,6 +31,7 @@ android {
 dependencies {
     implementation(libs.core.ktx)
     implementation(libs.coroutines.android)
+    implementation(libs.datastore.preferences)
     implementation(libs.vosk.android)
 
     implementation(libs.hilt.android)

diff --git a/core/voice/src/main/java/com/kernel/ai/core/voice/AndroidTextToSpeechController.kt b/core/voice/src/main/java/com/kernel/ai/core/voice/AndroidTextToSpeechController.kt
@@ -1,5 +1,8 @@
 package com.kernel.ai.core.voice
 
+import android.media.AudioAttributes
+import android.media.AudioFocusRequest
+import android.media.AudioManager
 import android.content.Context
 import android.os.Bundle
 import android.speech.tts.TextToSpeech
@@ -29,6 +32,9 @@ class AndroidTextToSpeechController @Inject constructor(
     private val scope = CoroutineScope(SupervisorJob() + Dispatchers.Main.immediate)
     private val _events = MutableSharedFlow<VoiceOutputEvent>(extraBufferCapacity = 8)
     override val events: Flow<VoiceOutputEvent> = _events.asSharedFlow()
+    private val audioManager by lazy {
+        context.getSystemService(Context.AUDIO_SERVICE) as AudioManager
+    }
 
     @Volatile
     private var textToSpeech: TextToSpeech? = null
@@ -42,6 +48,17 @@ class AndroidTextToSpeechController @Inject constructor(
     @Volatile
     private var activeUtteranceText: String? = null
 
+    @Volatile
+    private var audioFocusRequest: AudioFocusRequest? = null
+
+    override suspend fun warmUp(): VoiceOutputResult {
+        return if (ensureReady() != null) {
+            VoiceOutputResult.Spoken
+        } else {
+            VoiceOutputResult.Unavailable("Text-to-speech is unavailable on this device.")
+        }
+    }
+
     override suspend fun speak(request: VoiceSpeakRequest): VoiceOutputResult {
         val engine = ensureReady() ?: return VoiceOutputResult.Unavailable(
             "Text-to-speech is unavailable on this device."
@@ -56,6 +73,7 @@ class AndroidTextToSpeechController @Inject constructor(
         }
 
         engine.language = locale
+        requestAudioFocus()
         val utteranceId = request.utteranceId ?: "kernel-voice-${System.nanoTime()}"
         activeUtteranceId = utteranceId
         activeUtteranceText = request.text
@@ -68,6 +86,7 @@ class AndroidTextToSpeechController @Inject constructor(
         return if (result == TextToSpeech.ERROR) {
             activeUtteranceId = null
             activeUtteranceText = null
+            releaseAudioFocus()
             VoiceOutputResult.Unavailable("Text-to-speech failed to start.")
         } else {
             VoiceOutputResult.Spoken
@@ -80,6 +99,7 @@ class AndroidTextToSpeechController @Inject constructor(
             textToSpeech?.stop()
             activeUtteranceId = null
             activeUtteranceText = null
+            releaseAudioFocus()
             if (hadActiveUtterance) {
                 _events.emit(VoiceOutputEvent.SpeakingStopped)
             }
@@ -100,6 +120,12 @@ class AndroidTextToSpeechController @Inject constructor(
         val engine = TextToSpeech(context) { status ->
             if (!deferred.isCompleted) deferred.complete(status)
         }
+        engine.setAudioAttributes(
+            AudioAttributes.Builder()
+                .setUsage(AudioAttributes.USAGE_ASSISTANT)
+                .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+                .build()
+        )
         engine.setOnUtteranceProgressListener(
             object : UtteranceProgressListener() {
                 override fun onStart(utteranceId: String?) {
@@ -149,6 +175,31 @@ class AndroidTextToSpeechController @Inject constructor(
         if (utteranceId != activeUtteranceId) return
         activeUtteranceId = null
         activeUtteranceText = null
+        releaseAudioFocus()
         _events.tryEmit(VoiceOutputEvent.SpeakingStopped)
     }
+
+    private fun requestAudioFocus() {
+        val request = AudioFocusRequest.Builder(AudioManager.AUDIOFOCUS_GAIN_TRANSIENT_MAY_DUCK)
+            .setAudioAttributes(
+                AudioAttributes.Builder()
+                    .setUsage(AudioAttributes.USAGE_ASSISTANT)
+                    .setContentType(AudioAttributes.CONTENT_TYPE_SPEECH)
+                    .build()
+            )
+            .setAcceptsDelayedFocusGain(false)
+            .setWillPauseWhenDucked(false)
+            .setOnAudioFocusChangeListener { }
+            .build()
+        audioFocusRequest = request
+        val result = audioManager.requestAudioFocus(request)
+        if (result != AudioManager.AUDIOFOCUS_REQUEST_GRANTED) {
+            Log.w(TAG, "TextToSpeech audio focus request not granted: $result")
+        }
+    }
+
+    private fun releaseAudioFocus() {
+        audioFocusRequest?.let { audioManager.abandonAudioFocusRequest(it) }
+        audioFocusRequest = null
+    }
 }
diff --git a/core/voice/src/main/java/com/kernel/ai/core/voice/VoiceOutputController.kt b/core/voice/src/main/java/com/kernel/ai/core/voice/VoiceOutputController.kt
@@ -23,6 +23,8 @@ sealed interface VoiceOutputEvent {
 interface VoiceOutputController {
     val events: Flow<VoiceOutputEvent> get() = emptyFlow()
 
+    suspend fun warmUp(): VoiceOutputResult = VoiceOutputResult.Spoken
+
     suspend fun speak(request: VoiceSpeakRequest): VoiceOutputResult
 
     fun stop()

diff --git a/core/voice/src/main/java/com/kernel/ai/core/voice/VoiceOutputPreferences.kt b/core/voice/src/main/java/com/kernel/ai/core/voice/VoiceOutputPreferences.kt
@@ -0,0 +1,43 @@
+package com.kernel.ai.core.voice
+
+import android.content.Context
+import android.util.Log
+import androidx.datastore.preferences.core.booleanPreferencesKey
+import androidx.datastore.preferences.core.edit
+import androidx.datastore.preferences.core.emptyPreferences
+import androidx.datastore.preferences.preferencesDataStore
+import dagger.hilt.android.qualifiers.ApplicationContext
+import java.io.IOException
+import javax.inject.Inject
+import javax.inject.Singleton
+import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.catch
+import kotlinx.coroutines.flow.map
+
+private const val TAG = "VoiceOutputPrefs"
+private val Context.voiceOutputPrefsDataStore by preferencesDataStore(name = "voice_output_preferences")
+
+@Singleton
+class VoiceOutputPreferences @Inject constructor(
+    @ApplicationContext private val context: Context,
+) {
+    private val spokenResponsesEnabledKey =
+        booleanPreferencesKey("quick_actions_spoken_responses_enabled")
+
+    val spokenResponsesEnabled: Flow<Boolean> = context.voiceOutputPrefsDataStore.data
+        .catch { e ->
+            if (e is IOException) {
+                Log.e(TAG, "Failed reading voice output preferences; using defaults", e)
+                emit(emptyPreferences())
+            } else {
+                throw e
+            }
+        }
+        .map { prefs -> prefs[spokenResponsesEnabledKey] ?: true }
+
+    suspend fun setSpokenResponsesEnabled(enabled: Boolean) {
+        context.voiceOutputPrefsDataStore.edit { prefs ->
+            prefs[spokenResponsesEnabledKey] = enabled
+        }
+    }
+}
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
@@ -272,8 +272,8 @@ Lower-priority skill additions — third-party integrations and local utilities.
 
 | Sub-Issue | Title | Status | Priority |
 |-----------|-------|--------|----------|
-| [#671](https://github.com/NickMonrad/kernel-ai-assistant/issues/671) | Offline push-to-talk voice input foundation | ⬜ Pending | 🟡 Medium |
-| [#672](https://github.com/NickMonrad/kernel-ai-assistant/issues/672) | Generic spoken response / TTS foundation | ⬜ Pending | 🟡 Medium |
+| [#671](https://github.com/NickMonrad/kernel-ai-assistant/issues/671) | Offline push-to-talk voice input foundation | ✅ Done — PR #711 | 🟡 Medium |
+| [#672](https://github.com/NickMonrad/kernel-ai-assistant/issues/672) | Generic spoken response / TTS foundation | ✅ Done — PR #711 | 🟡 Medium |
 | [#678](https://github.com/NickMonrad/kernel-ai-assistant/issues/678) | Optional native Android STT engine alongside Vosk | ⬜ Pending | 🟡 Medium |
 | [#700](https://github.com/NickMonrad/kernel-ai-assistant/issues/700) | Parakeet CTC STT evaluation | ⬜ Pending | 🟡 Medium |
 | [#703](https://github.com/NickMonrad/kernel-ai-assistant/issues/703) | Whisper.cpp vs Vosk + staged vision follow-up | ⬜ Pending | 🟡 Medium |

diff --git a/docs/SPECIFICATION.md b/docs/SPECIFICATION.md
@@ -1,6 +1,6 @@
 # Technical Specification: Jandal AI — Local-First Android AI Assistant
 
-> **Last updated:** 2026-04-22 (docs refresh for #523 / PR #682; prompt/tool-routing docs updated)
+> **Last updated:** 2026-05-02 (voice foundations and roadmap summary refresh)
 >
 > This is the authoritative technical specification for Jandal AI. For feature status and
 > delivery timeline, see [`ROADMAP.md`](./ROADMAP.md).
@@ -575,7 +575,7 @@ Community-extensible skills run sandboxed via **Chicory** (pure JVM Wasm runtime
 - **Navigation:** Bottom nav bar — Chats tab (conversations list) + Actions tab (quick commands)
 - **Chat:** Streaming token display, thinking mode indicator, markdown rendering, multi-conversation
 - **Actions tab:** History list, FAB (⚡) for new commands, bottom sheet input, Room-persisted history
-- **Voice:** Tap-to-toggle with auto-stop on silence (future: "Hey Jandal" wake word)
+- **Voice:** Quick Actions push-to-talk with offline STT and spoken QIR responses; chat voice and wake word remain future work
 - **Skill results:** Inline rich cards in the conversation stream, with expandable list previews and link surfacing for fallback/plain-text results
 - **Persona:** Friendly, concise, dry-humoured Kiwi — see §7 for full identity details
 
@@ -805,9 +805,9 @@ for the larger planned coverage matrix.
 |-------|-------------|--------|
 | 1 | Core LiteRT-LM chat + GPU/NPU + GPU alignment fixes + OOM protection | ✅ Complete |
 | 2 | sqlite-vec RAG + EmbeddingGemma + episodic distillation + memory UI | ✅ Complete |
-| 3 | Resident Agent Architecture: QIR + native SDK tool calling, rich tool results, weather/list/date/media skills, and broader multi-turn support | 🔄 In Progress |
+| 3 | Resident Agent Architecture: QIR + native SDK tool calling, rich tool results, voice foundations, weather/list/date/media skills, and broader multi-turn support | 🔄 In Progress |
 | 4 | Dreaming Engine (overnight distillation) + Semantic Cache + Self-Healing Identity | ⬜ Planned |
 | 5 | Chicory Wasm Runtime + GitHub Skill Store | ⬜ Planned |
-| 6 | 8GB device optimisation (dynamic weight loading, E2B auto-select) + wake word | ⬜ Planned |
+| 6 | 8GB device optimisation (dynamic weight loading, E2B auto-select) | ⬜ Planned |
 
 See [`ROADMAP.md`](./ROADMAP.md) for full task-level detail.
diff --git a/docs/phase-3f-voice-foundations-plan.md b/docs/phase-3f-voice-foundations-plan.md
@@ -71,6 +71,12 @@ Baseline target:
 - transcript flows through the same routing path as typed input
 - empty/silence/error states are visible and recoverable
 
+Current branch status:
+
+- Quick Actions already ships with a push-to-talk mic entry point, microphone permission flow, and offline Vosk-backed transcription
+- transcripts route back through the existing `ActionsViewModel` Quick Actions path
+- the current manual device pass was sufficient to treat `#671` as complete on merge alongside `#672`
+
 #### 4. Implement spoken responses
 
 Primary issue: `#672`
@@ -81,6 +87,14 @@ Baseline target:
 - slot-fill prompts can be spoken
 - interruption / stop behavior is predictable
 - typed Quick Actions remain silent by default
+- spoken-response controls live under **Settings -> Voice** so future STT/TTS options can expand in one place
+
+Current branch status:
+
+- `AndroidTextToSpeechController` has warm-up support and explicit assistant-style audio attributes / transient audio focus handling
+- Quick Actions / QIR spoken responses are controlled by a shared `VoiceOutputPreferences` preference
+- the user-facing spoken-response toggle has been moved out of About into a dedicated **Settings -> Voice** screen
+- the new Voice screen is the intended future home for additional STT/TTS settings, engine choices, and voice model controls
 
 #### 5. Connect the voice session loop
 

diff --git a/feature/chat/src/main/java/com/kernel/ai/feature/chat/ActionsViewModel.kt b/feature/chat/src/main/java/com/kernel/ai/feature/chat/ActionsViewModel.kt
@@ -17,6 +17,7 @@ import com.kernel.ai.core.voice.VoiceInputEvent
 import com.kernel.ai.core.voice.VoiceInputStartResult
 import com.kernel.ai.core.voice.VoiceOutputController
 import com.kernel.ai.core.voice.VoiceOutputEvent
+import com.kernel.ai.core.voice.VoiceOutputPreferences
 import com.kernel.ai.core.voice.VoiceOutputResult
 import com.kernel.ai.core.voice.VoiceSpeakRequest
 import dagger.hilt.android.lifecycle.HiltViewModel
@@ -57,6 +58,7 @@ class ActionsViewModel @Inject constructor(
     private val quickActionDao: QuickActionDao,
     private val voiceInputController: VoiceInputController,
     private val voiceOutputController: VoiceOutputController,
+    private val voiceOutputPreferences: VoiceOutputPreferences,
 ) : ViewModel() {
 
     // ── Action history ──────────────────────────────────────────────────────
@@ -129,8 +131,19 @@ class ActionsViewModel @Inject constructor(
     val voicePlaybackState: StateFlow<VoicePlaybackState> = _voicePlaybackState.asStateFlow()
     private var shouldAutoStartVoiceSlotReply = false
     private var pendingPhonePermissionAction: PendingPhonePermissionAction? = null
+    private var spokenResponsesEnabled = true
 
     init {
+        viewModelScope.launch {
+            voiceOutputPreferences.spokenResponsesEnabled.collect { enabled ->
+                spokenResponsesEnabled = enabled
+                if (enabled) {
+                    voiceOutputController.warmUp()
+                } else {
+                    voiceOutputController.stop()
+                }
+            }
+        }
         viewModelScope.launch {
             voiceInputController.events.collect { event ->
                 when (event) {
@@ -558,6 +571,7 @@ class ActionsViewModel @Inject constructor(
 
     private fun speakForVoice(inputMode: InputMode, text: String) {
         if (inputMode != InputMode.Voice) return
+        if (!spokenResponsesEnabled) return
         val summary = toSpokenSummary(text)
         if (summary.isBlank()) return
         viewModelScope.launch {

diff --git a/feature/chat/src/test/java/com/kernel/ai/feature/chat/ActionsViewModelVoiceTest.kt b/feature/chat/src/test/java/com/kernel/ai/feature/chat/ActionsViewModelVoiceTest.kt
@@ -15,6 +15,7 @@ import com.kernel.ai.core.voice.VoiceInputEvent
 import com.kernel.ai.core.voice.VoiceInputStartResult
 import com.kernel.ai.core.voice.VoiceOutputController
 import com.kernel.ai.core.voice.VoiceOutputEvent
+import com.kernel.ai.core.voice.VoiceOutputPreferences
 import com.kernel.ai.core.voice.VoiceOutputResult
 import com.kernel.ai.core.voice.VoiceSpeakRequest
 import io.mockk.Runs
@@ -28,6 +29,7 @@ import io.mockk.unmockkStatic
 import io.mockk.verify
 import kotlinx.coroutines.Dispatchers
 import kotlinx.coroutines.ExperimentalCoroutinesApi
+import kotlinx.coroutines.flow.MutableStateFlow
 import kotlinx.coroutines.flow.MutableSharedFlow
 import kotlinx.coroutines.flow.emptyFlow
 import kotlinx.coroutines.flow.flowOf
@@ -55,8 +57,10 @@ class ActionsViewModelVoiceTest {
     private val quickActionDao: QuickActionDao = mockk()
     private val voiceInputController: VoiceInputController = mockk()
     private val voiceOutputController: VoiceOutputController = mockk()
+    private val voiceOutputPreferences: VoiceOutputPreferences = mockk()
     private val voiceInputEvents = MutableSharedFlow<VoiceInputEvent>()
     private val voiceOutputEvents = MutableSharedFlow<VoiceOutputEvent>()
+    private val spokenResponsesEnabled = MutableStateFlow(true)
 
     private lateinit var viewModel: ActionsViewModel
 
@@ -71,15 +75,18 @@ class ActionsViewModelVoiceTest {
         coEvery { quickActionDao.insert(any()) } just Runs
         every { voiceInputController.events } returns voiceInputEvents
         every { voiceInputController.stopListening() } just Runs
+        coEvery { voiceOutputController.warmUp() } returns VoiceOutputResult.Spoken
         coEvery { voiceOutputController.speak(any()) } returns VoiceOutputResult.Spoken
         every { voiceOutputController.events } returns voiceOutputEvents
         every { voiceOutputController.stop() } just Runs
+        every { voiceOutputPreferences.spokenResponsesEnabled } returns spokenResponsesEnabled
         viewModel = ActionsViewModel(
             quickIntentRouter = quickIntentRouter,
             skillRegistry = skillRegistry,
             quickActionDao = quickActionDao,
             voiceInputController = voiceInputController,
             voiceOutputController = voiceOutputController,
+            voiceOutputPreferences = voiceOutputPreferences,
         )
     }
 
@@ -259,6 +266,29 @@ class ActionsViewModelVoiceTest {
         }
     }
 
+    @Test
+    fun `voice mode does not speak when spoken responses disabled`() = runTest(dispatcher) {
+        val directSkill = mockk<Skill>()
+        spokenResponsesEnabled.value = false
+        every { quickIntentRouter.route("turn on flashlight") } returns
+            QuickIntentRouter.RouteResult.RegexMatch(
+                QuickIntentRouter.MatchedIntent(
+                    intentName = "toggle_flashlight_on",
+                    params = emptyMap(),
+                ),
+            )
+        every { skillRegistry.get("toggle_flashlight_on") } returns directSkill
+        every { directSkill.name } returns "toggle_flashlight_on"
+        every { directSkill.description } returns "Toggle flashlight"
+        every { directSkill.schema } returns SkillSchema()
+        coEvery { directSkill.execute(any()) } returns SkillResult.Success("Flashlight on")
+
+        viewModel.executeAction("turn on flashlight", InputMode.Voice)
+        advanceUntilIdle()
+
+        coVerify(exactly = 0) { voiceOutputController.speak(any()) }
+    }
+
     @Test
     fun `make call permission flow emits event and retries after grant`() = runTest(dispatcher) {
         val runIntentSkill = mockk<Skill>()