From 43350830ba353cd49b4ce291fa17ed9f4d0ee925 Mon Sep 17 00:00:00 2001 From: cozy Date: Sat, 18 Apr 2026 14:36:05 -0400 Subject: [PATCH] fix(auth): add client-side deadline + reconnect recovery for in-TUI auth MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #60 left the in-TUI Spotify auth flow vulnerable to two stuck-state bugs: if the daemon crashed mid-flow the `auth_in_progress` flag survived indefinitely, and a control-socket reconnect would leave the client convinced a flow was still running against a daemon that had no memory of it. Fix both: - `PickerState::tick_auth_deadline` trips after 5m30s — longer than the daemon's own 5m ceiling so the daemon's `AuthFailed { "timeout" }` normally wins with its specific reason, and this is the last-resort unstick when the daemon is simply gone. Called every render tick. - `ReconnectingSession` fires a reconnect notice via a sender the TUI installs on startup. The render loop drains the channel each tick, calls `handle_daemon_reconnected` (which clears any pending auth flag with a benign "daemon reconnected — retry if needed" banner), and re-issues `Verb::ReadConfig` so the Settings tab resyncs with the new daemon's actual auth state. Fix 1 from the original scope — capturing the OAuth authorize URL so the TUI can render it for SSH/headless users — turned out to be structurally impossible from outside librespot-oauth. Both the CSRF state and the PKCE challenge are generated inside `OAuthClient::set_auth_url` with no public hook. Any URL we'd build ourselves would pair a different state + challenge with the verifier librespot's listener is waiting to exchange — the user's browser redirect would then fail PKCE validation. The task's stop-and-report clause applies. Added a TODO in auth.rs pointing at the upstream fix (expose the URL through the public API) and softened the pending-row message to point headless users at the sibling `clitunes auth` CLI, which runs in their terminal and can print the URL directly. Tests: - tick_auth_deadline: noop when idle, noop before timeout, trips after timeout with a client-side reason, and doesn't double-fail. - handle_daemon_reconnected: clears pending flag + surfaces benign note; noop when not in progress. - set_auth_completed/failed clear the deadline clock. - `a` press populates `auth_started_at`. - paint: pending state renders the new "run clitunes auth" fallback. Local-CI triple clean: `cargo fmt --check`, `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test --workspace --all-features`. Closes clitunes-lsn / CLI-93 --- .../src/sources/spotify/auth.rs | 10 + .../clitunes-engine/src/tui/picker/paint.rs | 36 ++++ .../clitunes-engine/src/tui/picker/state.rs | 174 +++++++++++++++++- crates/clitunes/src/client/reconnect.rs | 18 ++ crates/clitunes/src/client/render_loop.rs | 36 ++++ crates/clitunes/src/main.rs | 7 + 6 files changed, 276 insertions(+), 5 deletions(-) diff --git a/crates/clitunes-engine/src/sources/spotify/auth.rs b/crates/clitunes-engine/src/sources/spotify/auth.rs index 4f2f97f..da36b91 100644 --- a/crates/clitunes-engine/src/sources/spotify/auth.rs +++ b/crates/clitunes-engine/src/sources/spotify/auth.rs @@ -336,6 +336,16 @@ fn print_headless_instructions() { /// Run the OAuth2 PKCE flow via librespot-oauth. Listens on /// `127.0.0.1:8898` for the redirect callback. When `open_browser` /// is true, also opens the auth URL in the default browser. +// +// TODO(librespot-oauth): upstream a public API that exposes the PKCE +// authorize URL so the daemon can emit it to the TUI client. Today +// `OAuthClient::set_auth_url` is private and generates the CSRF token +// and PKCE challenge internally, which means we can't reconstruct the +// exact URL from outside the crate — any URL we build would have a +// different `state` and `code_challenge` than the one librespot's +// listener is waiting to exchange, so the user's browser redirect +// would fail PKCE validation. See +// https://github.com/librespot-org/librespot/tree/dev/oauth fn run_oauth_flow(open_browser: bool) -> Result { let client_id = spotify_client_id(); let mut builder = diff --git a/crates/clitunes-engine/src/tui/picker/paint.rs b/crates/clitunes-engine/src/tui/picker/paint.rs index 733e520..c1d1fd2 100644 --- a/crates/clitunes-engine/src/tui/picker/paint.rs +++ b/crates/clitunes-engine/src/tui/picker/paint.rs @@ -652,6 +652,12 @@ fn paint_settings_body( // re-running `clitunes auth` would race the daemon, so only // show the progress line. if auth_in_progress { + // Two rows: primary status then a secondary hint pointing + // SSH/headless users at the sibling CLI, since the daemon + // can't surface the OAuth URL through the TUI today (see + // TODO(librespot-oauth) in sources/spotify/auth.rs). The + // sibling CLI runs in the same process as the user's + // terminal so it can print the URL directly. write_centered( grid, inner_x0, @@ -661,6 +667,19 @@ fn paint_settings_body( accent, surface, ); + let hint_row = row.saturating_add(1); + if hint_row < rows_end { + write_centered( + grid, + inner_x0, + inner_w, + hint_row, + "Headless? Cancel and run `clitunes auth` from a shell.", + body_dim_fg, + surface, + ); + row = hint_row; + } } else { let instruction = match snap.auth_status { SettingsAuthStatus::LoggedIn => "Press `a` to refresh scopes or switch accounts.", @@ -1130,6 +1149,23 @@ mod tests { ); } + #[test] + fn paint_picker_settings_tab_pending_state_shows_headless_fallback_hint() { + let mut grid = CellGrid::new(120, 40); + let list = baked_list(); + let theme = default_theme(); + let mut state = PickerState::new(&list, 0); + state.active_tab = PickerTab::Settings; + state.set_settings(sample_settings_snapshot(SettingsAuthStatus::LoggedOut)); + state.set_auth_started(); + let _rect = paint_picker(&mut grid, &list, &state, &theme).expect("rect"); + let text: String = grid.cells().iter().map(|c| c.ch).collect(); + assert!( + text.contains("clitunes auth"), + "pending state should point headless users at the sibling CLI" + ); + } + #[test] fn paint_picker_settings_tab_shows_auth_error() { let mut grid = CellGrid::new(120, 40); diff --git a/crates/clitunes-engine/src/tui/picker/state.rs b/crates/clitunes-engine/src/tui/picker/state.rs index de37fbf..dbfa040 100644 --- a/crates/clitunes-engine/src/tui/picker/state.rs +++ b/crates/clitunes-engine/src/tui/picker/state.rs @@ -42,12 +42,20 @@ //! [`PickerAction::PickSpotify`] with the URI, because the picker //! doesn't need to know what verb the caller sends. -use std::time::Instant; +use std::time::{Duration, Instant}; use clitunes_core::{BrowseItem, LibraryCategory}; use crate::tui::picker::curated_seed::CuratedList; +/// Client-side deadline for an in-flight OAuth flow, measured from the +/// moment the `a` keybind fires. Set 30 s longer than the daemon's own +/// 5-minute `AUTH_TIMEOUT` so the daemon's `AuthFailed { reason: "timeout" }` +/// normally wins the race and surfaces with its specific reason; this +/// client-side ceiling is the last-resort unstick for a daemon that +/// died mid-flow and never emits any terminal event. +pub const AUTH_CLIENT_TIMEOUT: Duration = Duration::from_secs(330); + /// Maximum time between keypresses to count as "rapid" for momentum /// (150 ms). Tuned so that holding an arrow key at typical repeat /// rates (~80–100 ms) always counts as rapid, while deliberate @@ -217,6 +225,11 @@ pub struct PickerState { /// Last `AuthFailed { reason }` from the daemon, shown inline on /// the Settings tab until the next `a` press clears it. pub auth_error: Option, + /// Wall-clock instant at which `auth_in_progress` was last set. The + /// render loop uses this with [`AUTH_CLIENT_TIMEOUT`] to unstick + /// the UI when the daemon died mid-flow and will never emit a + /// terminal `AuthCompleted` / `AuthFailed` event. + pub auth_started_at: Option, /// Banner shown in the header when the user's last station has /// gone missing. `None` on the happy path. @@ -243,6 +256,7 @@ impl PickerState { settings: None, auth_in_progress: false, auth_error: None, + auth_started_at: None, banner: None, rapid_count: 0, last_nav_at: None, @@ -294,8 +308,15 @@ impl PickerState { /// the render loop when an `AuthStarted` event arrives. Idempotent /// with the state already set by the `a` keybind. pub fn set_auth_started(&mut self) { + self.set_auth_started_at(Instant::now()); + } + + /// Internal variant that takes the wall-clock instant so tests can + /// drive the client-deadline timer deterministically. + pub fn set_auth_started_at(&mut self, now: Instant) { self.auth_in_progress = true; self.auth_error = None; + self.auth_started_at = Some(now); } /// Auth flow ended successfully — clear the pending state so the @@ -303,6 +324,7 @@ impl PickerState { pub fn set_auth_completed(&mut self) { self.auth_in_progress = false; self.auth_error = None; + self.auth_started_at = None; } /// Auth flow failed. `reason` is rendered under the status line @@ -310,6 +332,44 @@ impl PickerState { pub fn set_auth_failed(&mut self, reason: String) { self.auth_in_progress = false; self.auth_error = Some(reason); + self.auth_started_at = None; + } + + /// Called on every render-loop tick. If an auth flow has been + /// pending longer than [`AUTH_CLIENT_TIMEOUT`], flip it to a + /// failure so the user isn't stuck staring at "Opening browser…" + /// when the daemon crashed mid-flow and will never reply. + /// + /// Returns `true` when the tick tripped the deadline, so the + /// caller can log it. + pub fn tick_auth_deadline(&mut self) -> bool { + self.tick_auth_deadline_at(Instant::now()) + } + + /// Deterministic variant for tests. + pub fn tick_auth_deadline_at(&mut self, now: Instant) -> bool { + if !self.auth_in_progress { + return false; + } + let Some(started) = self.auth_started_at else { + return false; + }; + if now.saturating_duration_since(started) < AUTH_CLIENT_TIMEOUT { + return false; + } + self.set_auth_failed("no response from daemon — retry when ready".into()); + true + } + + /// Clear any in-flight auth state after a control-socket reconnect. + /// The daemon we just rejoined has a fresh atomic (it may or may + /// not be the same process), so the old pending flag is meaningless + /// — surface a benign note and let the next `ConfigSnapshot` paint + /// the true auth status. + pub fn handle_daemon_reconnected(&mut self) { + if self.auth_in_progress { + self.set_auth_failed("daemon reconnected — retry if needed".into()); + } } /// Handle a key. Public entry point used by the render loop. @@ -410,10 +470,12 @@ impl PickerState { if self.auth_in_progress || self.is_logged_in() { return PickerAction::Ignored; } - // Clear any stale failure banner so the user sees a - // clean "pending" state on their second attempt. - self.auth_error = None; - self.auth_in_progress = true; + // Optimistic: flip to pending immediately so the user + // doesn't have to wait for the daemon's `AuthStarted` + // event to see feedback. Also starts the client-side + // deadline clock — if the daemon never replies, + // `tick_auth_deadline` will unstick us. + self.set_auth_started(); PickerAction::StartAuth } _ => PickerAction::Ignored, @@ -838,6 +900,108 @@ mod tests { assert_eq!(st.auth_error.as_deref(), Some("timeout")); } + #[test] + fn a_keypress_populates_auth_started_at() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + st.active_tab = PickerTab::Settings; + st.set_settings(snapshot_with_status(SettingsAuthStatus::LoggedOut)); + assert!(st.auth_started_at.is_none()); + st.handle_key(PickerKey::Char('a')); + assert!( + st.auth_started_at.is_some(), + "a press must start the deadline clock" + ); + } + + #[test] + fn tick_auth_deadline_is_noop_when_not_in_progress() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + assert!(!st.tick_auth_deadline()); + assert!(st.auth_error.is_none()); + } + + #[test] + fn tick_auth_deadline_is_noop_before_timeout() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + let t0 = Instant::now(); + st.set_auth_started_at(t0); + // Half a second in — still well inside the 5m30s window. + assert!(!st.tick_auth_deadline_at(t0 + Duration::from_millis(500))); + assert!(st.auth_in_progress); + } + + #[test] + fn tick_auth_deadline_trips_after_timeout() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + let t0 = Instant::now(); + st.set_auth_started_at(t0); + let past = t0 + AUTH_CLIENT_TIMEOUT + Duration::from_secs(1); + assert!(st.tick_auth_deadline_at(past)); + assert!(!st.auth_in_progress, "deadline must clear the pending flag"); + assert!( + st.auth_error + .as_deref() + .is_some_and(|e| e.contains("no response from daemon")), + "deadline must surface a client-side failure reason" + ); + // Re-tick after the deadline fires must not double-fail. + assert!(!st.tick_auth_deadline_at(past + Duration::from_secs(60))); + } + + #[test] + fn daemon_reconnect_clears_pending_auth_with_benign_note() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + st.active_tab = PickerTab::Settings; + st.set_settings(snapshot_with_status(SettingsAuthStatus::LoggedOut)); + st.handle_key(PickerKey::Char('a')); + assert!(st.auth_in_progress); + st.handle_daemon_reconnected(); + assert!(!st.auth_in_progress); + assert!( + st.auth_error + .as_deref() + .is_some_and(|e| e.contains("daemon reconnected")), + "reconnect note must surface a benign reason" + ); + assert!( + st.auth_started_at.is_none(), + "reconnect must clear the deadline clock" + ); + } + + #[test] + fn daemon_reconnect_is_noop_when_not_in_progress() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + st.handle_daemon_reconnected(); + assert!(!st.auth_in_progress); + assert!(st.auth_error.is_none()); + } + + #[test] + fn set_auth_completed_clears_deadline_clock() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + st.set_auth_started(); + assert!(st.auth_started_at.is_some()); + st.set_auth_completed(); + assert!(st.auth_started_at.is_none()); + } + + #[test] + fn set_auth_failed_clears_deadline_clock() { + let list = baked_list(); + let mut st = PickerState::new(&list, 0); + st.set_auth_started(); + st.set_auth_failed("nope".into()); + assert!(st.auth_started_at.is_none()); + } + #[test] fn second_a_press_after_failure_retries_and_clears_error() { let list = baked_list(); diff --git a/crates/clitunes/src/client/reconnect.rs b/crates/clitunes/src/client/reconnect.rs index a50e3c2..aad2667 100644 --- a/crates/clitunes/src/client/reconnect.rs +++ b/crates/clitunes/src/client/reconnect.rs @@ -15,6 +15,11 @@ const MAX_RECONNECT_ATTEMPTS: usize = 5; pub struct ReconnectingSession { session: ControlSession, socket_path: PathBuf, + /// Fires once per successful reconnect so the render loop can + /// rebuild any client-side state that assumed continuity with the + /// prior daemon process (e.g. pending Spotify auth). `None` for + /// headless callers that don't care. + reconnect_notify: Option>, } impl ReconnectingSession { @@ -23,9 +28,17 @@ impl ReconnectingSession { Ok(Self { session, socket_path, + reconnect_notify: None, }) } + /// Install a reconnect notifier. Called by the TUI bridge so the + /// render loop can react to socket drops that survived auto-spawn + /// recovery. Headless paths leave this unset. + pub fn set_reconnect_notifier(&mut self, tx: std::sync::mpsc::Sender<()>) { + self.reconnect_notify = Some(tx); + } + pub async fn send_verb(&mut self, verb: Verb) -> Result<()> { self.session.send_verb(verb).await } @@ -72,6 +85,11 @@ impl ReconnectingSession { Ok(session) => { self.session = session; tracing::info!(target: "clitunes", attempt, "reconnected to daemon"); + if let Some(tx) = self.reconnect_notify.as_ref() { + // Best-effort: the render loop may have + // dropped the receiver during shutdown. + let _ = tx.send(()); + } return Ok(()); } Err(e) => { diff --git a/crates/clitunes/src/client/render_loop.rs b/crates/clitunes/src/client/render_loop.rs index d80104b..fd3f889 100644 --- a/crates/clitunes/src/client/render_loop.rs +++ b/crates/clitunes/src/client/render_loop.rs @@ -90,6 +90,11 @@ pub struct RenderLoopConfig { pub consumer: Box, pub sample_rate: u32, pub event_rx: std::sync::mpsc::Receiver, + /// Synthetic notifications for conditions that don't map to a wire + /// `Event`. Currently used by the reconnect bridge to tell the + /// render loop "the daemon socket dropped and came back" so the + /// picker can reset any in-flight auth state. Drained every tick. + pub reconnect_rx: std::sync::mpsc::Receiver<()>, pub verb_tx: tokio::sync::mpsc::Sender, pub stop: Arc, pub measure_startup: bool, @@ -564,6 +569,7 @@ pub struct RenderLoop { fft: FftTap, sample_rate: u32, event_rx: std::sync::mpsc::Receiver, + reconnect_rx: std::sync::mpsc::Receiver<()>, verb_tx: tokio::sync::mpsc::Sender, stop: Arc, measure_startup: bool, @@ -577,6 +583,7 @@ impl RenderLoop { fft: FftTap::new(FFT_SIZE), sample_rate: config.sample_rate, event_rx: config.event_rx, + reconnect_rx: config.reconnect_rx, verb_tx: config.verb_tx, stop: config.stop, measure_startup: config.measure_startup, @@ -637,6 +644,26 @@ impl RenderLoop { state.handle_event(&ev, &self.stop); } + // Process reconnect notices from the bridge task. Each + // notice means the control socket dropped and came back — + // the new daemon (may be a fresh process) has no memory of + // our in-flight state, so clear any pending auth and ask + // for a fresh `ConfigSnapshot` to repaint the Settings tab. + let mut did_reconnect = false; + while self.reconnect_rx.try_recv().is_ok() { + did_reconnect = true; + } + if did_reconnect { + state.picker_state.handle_daemon_reconnected(); + if let Err(e) = self.verb_tx.try_send(Verb::ReadConfig) { + tracing::error!( + target: "clitunes", + error = %e, + "reconnect: failed to request fresh config snapshot" + ); + } + } + // Process user input. while let Ok(key) = key_rx.try_recv() { if state.handle_key(key, &self.verb_tx) { @@ -658,6 +685,15 @@ impl RenderLoop { } } + // Client-side deadline: unsticks the UI when the daemon + // died mid-flow and will never emit AuthCompleted/Failed. + if state.picker_state.tick_auth_deadline() { + tracing::warn!( + target: "clitunes", + "auth: client-side deadline elapsed with no daemon response" + ); + } + // Flush any debounced search query whose window has elapsed. if let Some((query, dirty_at)) = state.pending_search.as_ref() { if dirty_at.elapsed() >= SEARCH_DEBOUNCE { diff --git a/crates/clitunes/src/main.rs b/crates/clitunes/src/main.rs index a55f650..ce802ae 100644 --- a/crates/clitunes/src/main.rs +++ b/crates/clitunes/src/main.rs @@ -258,6 +258,12 @@ async fn run_full_tui( let (event_tx, event_rx) = std::sync::mpsc::channel::(); let (verb_tx, mut verb_rx) = tokio::sync::mpsc::channel::(64); + // Bridge → render-loop notifier for post-reconnect recovery. The + // render loop coalesces drained notices into a single recovery + // action per tick, so unbounded is harmless and spares us a + // BlockedOnFull edge case if the render loop stalls. + let (reconnect_tx, reconnect_rx) = std::sync::mpsc::channel::<()>(); + session.set_reconnect_notifier(reconnect_tx); let bridge_stop = Arc::clone(&stop); let bridge_state_path = state_path.clone(); @@ -324,6 +330,7 @@ async fn run_full_tui( consumer: Box::new(consumer), sample_rate, event_rx, + reconnect_rx, verb_tx, stop: render_stop, measure_startup: startup.is_some(),