From 7b8f01a6c2313c99c23ef7058725602de38017d1 Mon Sep 17 00:00:00 2001 From: Matt Van Horn <455140+mvanhorn@users.noreply.github.com> Date: Thu, 18 Jun 2026 01:11:19 -0700 Subject: [PATCH] fix(hid): make one-shot enumerate retry transport-agnostic The one-shot `enumerate()` free fn (CLI `list`/`diag`) retried only while a probed node reported `healthy == false`. A Unifying receiver defines `healthy = pairing_count.is_some()`, which stays true even when a device's arrival event lands after the 1.5s drain window and the device is dropped, so the retry never fired and the CLI returned a short device set. Separate the one-shot retry signal from per-node ledger health: add a `complete` flag reported alongside `healthy`. For Unifying, `complete` requires `paired.len() == pairing_count`, so a partial drain now triggers a retry while ledger replay (`healthy`) is unchanged. The retry loop stops on completeness, an unchanged inventory between consecutive attempts (so a chronically-short set stabilizes instead of burning every attempt), or the attempt cap. Add `PartialEq`/`Eq` derives to the inventory device structs so successive attempts can be diffed for the unchanged-stop fallback. Refs #277 --- crates/openlogi-core/src/device.rs | 80 ++++++++++- crates/openlogi-hid/src/inventory.rs | 200 +++++++++++++++++++++++---- 2 files changed, 249 insertions(+), 31 deletions(-) diff --git a/crates/openlogi-core/src/device.rs b/crates/openlogi-core/src/device.rs index 27f0b954..7d035346 100644 --- a/crates/openlogi-core/src/device.rs +++ b/crates/openlogi-core/src/device.rs @@ -146,14 +146,14 @@ pub enum BatteryStatus { Unknown, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct BatteryInfo { pub percentage: u8, pub level: BatteryLevel, pub status: BatteryStatus, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ReceiverInfo { pub name: String, pub vendor_id: u16, @@ -170,7 +170,7 @@ pub struct ReceiverInfo { /// Options+ asset registry's `modelId` (e.g. `"6b023"`) is the concatenation /// of an extended-model byte and one of these PIDs, so callers usually want /// to format `extended_model_id` + `model_ids[N]` to match. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct DeviceModelInfo { pub entity_count: u8, /// HID++ DeviceInformation serial number, when the device supports the @@ -204,7 +204,7 @@ impl DeviceModelInfo { clippy::struct_excessive_bools, reason = "bitfield mirroring HID++ DeviceInformation; transports are independent flags" )] -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)] pub struct DeviceTransports { pub usb: bool, pub equad: bool, @@ -212,7 +212,7 @@ pub struct DeviceTransports { pub bluetooth: bool, } -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct PairedDevice { /// Receiver-assigned slot (1..=6 for Bolt). pub slot: u8, @@ -239,7 +239,7 @@ pub struct PairedDevice { /// field and variant *order*, so reordering, retyping, or wrapping any field /// in this tree is a wire-format change and requires a `PROTOCOL_VERSION` /// bump (guarded by `openlogi-agent-core/tests/wire_format.rs`). -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct DeviceInventory { pub receiver: ReceiverInfo, pub paired: Vec, @@ -247,7 +247,51 @@ pub struct DeviceInventory { #[cfg(test)] mod tests { - use super::DeviceKind; + use super::{ + BatteryInfo, BatteryLevel, BatteryStatus, Capabilities, DeviceInventory, DeviceKind, + DeviceModelInfo, DeviceTransports, PairedDevice, ReceiverInfo, + }; + + fn inventory(slot: u8, wpid: Option, battery_percentage: u8) -> DeviceInventory { + DeviceInventory { + receiver: ReceiverInfo { + name: "Logi Bolt Receiver".to_string(), + vendor_id: 0x046d, + product_id: 0xc548, + unique_id: Some("receiver-1".to_string()), + }, + paired: vec![PairedDevice { + slot, + codename: Some("MX Test".to_string()), + wpid, + kind: DeviceKind::Mouse, + online: true, + battery: Some(BatteryInfo { + percentage: battery_percentage, + level: BatteryLevel::Good, + status: BatteryStatus::Discharging, + }), + model_info: Some(DeviceModelInfo { + entity_count: 1, + serial_number: Some("serial-1".to_string()), + unit_id: [1, 2, 3, 4], + transports: DeviceTransports { + usb: true, + equad: true, + btle: false, + bluetooth: false, + }, + model_ids: [0xb023, 0, 0], + extended_model_id: 0x02, + }), + capabilities: Some(Capabilities { + buttons: true, + pointer: true, + lighting: false, + }), + }], + } + } #[test] fn registry_type_is_case_folded() { @@ -314,4 +358,26 @@ mod tests { Capabilities::default() ); } + + #[test] + fn device_inventory_equality_includes_nested_device_fields() { + let base = inventory(1, Some(0xb023), 86); + assert_eq!(base, base.clone()); + + assert_ne!( + base, + inventory(2, Some(0xb023), 86), + "slot changes must affect inventory equality" + ); + assert_ne!( + base, + inventory(1, Some(0xb024), 86), + "wireless product id changes must affect inventory equality" + ); + assert_ne!( + base, + inventory(1, Some(0xb023), 87), + "nested battery changes must affect inventory equality" + ); + } } diff --git a/crates/openlogi-hid/src/inventory.rs b/crates/openlogi-hid/src/inventory.rs index 3e71349b..929a35bd 100644 --- a/crates/openlogi-hid/src/inventory.rs +++ b/crates/openlogi-hid/src/inventory.rs @@ -263,25 +263,63 @@ pub async fn enumerate() -> Result, InventoryError> { // few times instead, reusing the same enumerator so its ledger accumulates a // snapshot a later attempt can replay and the opened channel stays warm. // #226's 5 s request timeout inside `HidppChannel::send` makes a dead probe - // fail fast, so a short bounded retry is cheap. + // fail fast, so a short bounded retry is cheap. Some transports can answer + // while still yielding a short device set (for example, a Unifying arrival + // event landing just after the drain window). When every node answered this + // cycle but that healthy pass is still short, two identical inventories mean + // the expected stable Unifying offline drain has settled. A failed/timed-out + // probe must keep using the full retry budget so the next attempt can reopen + // the channel and recover. let mut enumerator = Enumerator::default(); + let mut previous_inventories: Option> = None; let mut attempt = 1u8; loop { - let (inventories, all_healthy) = enumerator.enumerate_reporting_health().await?; - if all_healthy || attempt >= ONESHOT_ATTEMPTS { + let (inventories, all_complete, all_healthy) = + enumerator.enumerate_reporting_completeness().await?; + if one_shot_should_stop( + previous_inventories.as_deref(), + &inventories, + all_complete, + all_healthy, + attempt, + ) { return Ok(inventories); } debug!( attempt, - "one-shot enumerate saw an unhealthy node — retrying" + all_complete, + all_healthy, + "one-shot enumerate inventory incomplete or still changing — retrying" ); + // Only a healthy pass is valid evidence for the unchanged-inventory + // stop, so the equality check below ever compares two consecutive + // healthy snapshots. A failed/timed-out probe (replayed last-good or + // partial live result) is cleared so it can't count as one of the two + // "stable" reads and short-circuit a later healthy-but-short pass. + previous_inventories = if all_healthy { Some(inventories) } else { None }; tokio::time::sleep(ONESHOT_RETRY_DELAY).await; attempt += 1; } } +/// Stop the one-shot retry loop when the snapshot is complete, when a healthy +/// but short pass has stabilized (the expected Unifying offline-drain case), or +/// when the explicit attempt cap is reached. An unchanged inventory from a +/// failed probe is not stable evidence; it must keep retrying until the cap. +fn one_shot_should_stop( + previous: Option<&[DeviceInventory]>, + current: &[DeviceInventory], + all_complete: bool, + all_healthy: bool, + attempt: u8, +) -> bool { + all_complete + || (all_healthy && previous.is_some_and(|previous| previous == current)) + || attempt >= ONESHOT_ATTEMPTS +} + /// Attempts a one-shot [`enumerate`] makes before returning whatever it last -/// read, when a node keeps coming back unhealthy. +/// read, when an inventory keeps coming back incomplete or changing. const ONESHOT_ATTEMPTS: u8 = 4; /// Delay between one-shot [`enumerate`] retries. A first probe usually wakes an @@ -300,19 +338,22 @@ impl Enumerator { /// channel is reopened, so a transient HID++ glitch can't masquerade as /// "no devices" (#218) — see [`crate::node_ledger`]. pub async fn enumerate(&mut self) -> Result, InventoryError> { - self.enumerate_reporting_health().await.map(|(inv, _)| inv) + self.enumerate_reporting_completeness() + .await + .map(|(inv, _, _)| inv) } - /// [`Self::enumerate`] plus whether every probed node answered cleanly this - /// pass — `false` if any probe timed out, failed to open, or read short of a - /// receiver's pairing count. The polling watcher ignores the flag (the ledger - /// already replays a node through a transient miss), but the one-shot - /// [`enumerate`] free fn uses it to retry: a fresh `Enumerator` has no ledger - /// history to replay, so a transient miss would otherwise surface as an - /// empty/partial list (#218). - async fn enumerate_reporting_health( + /// [`Self::enumerate`] plus whether every probed node produced a complete + /// enough snapshot for the one-shot caller to stop early, and whether every + /// probed node answered this cycle. Completeness is separate from per-node + /// health: a node can answer cleanly enough for the ledger to accept its + /// live inventory while still reporting a known count/list shortfall that + /// the one-shot retry should give one more chance to settle. Only healthy + /// shortfalls can use the unchanged-inventory early stop; failed probes must + /// run through the retry budget so a later attempt can recover. + async fn enumerate_reporting_completeness( &mut self, - ) -> Result<(Vec, bool), InventoryError> { + ) -> Result<(Vec, bool, bool), InventoryError> { self.tick = self.tick.wrapping_add(1); let tick = self.tick; let candidates = enumerate_hidpp_devices().await?; @@ -379,8 +420,11 @@ impl Enumerator { let mut inventories = Vec::new(); let mut outcomes = Vec::new(); - // Whether every node answered cleanly this pass. Drives the one-shot - // `enumerate` retry; the ledger's own per-node replay is unaffected. + // Aggregates for the one-shot retry. `all_complete` can stop + // immediately; `all_healthy` gates the unchanged-inventory shortcut so + // failed probes keep retrying. The ledger's own per-node replay is + // governed by `probe.healthy`. + let mut all_complete = true; let mut all_healthy = true; for (node, result) in results { let probe = if let Ok(probe) = result { @@ -393,6 +437,7 @@ impl Enumerator { warn!(budget = ?PROBE_BUDGET, "device probe timed out — treating as a failed probe"); NodeProbe::failed() }; + all_complete &= probe.complete; all_healthy &= probe.healthy; outcomes.extend(probe.outcomes); let settled = self.ledger.settle(&node, probe.healthy, probe.inventory); @@ -404,6 +449,7 @@ impl Enumerator { // Nodes that wouldn't open this tick still replay their last snapshot // (they have no cached channel to evict). for node in open_failures { + all_complete = false; all_healthy = false; let settled = self.ledger.settle(&node, false, None); inventories.extend(settled.inventory); @@ -424,7 +470,7 @@ impl Enumerator { } } self.evict_unseen(&seen_keys); - Ok((inventories, all_healthy)) + Ok((inventories, all_complete, all_healthy)) } /// Drop cache entries for devices not seen this tick, after a short grace so @@ -452,11 +498,13 @@ impl Enumerator { /// One probed node's contribution this tick: its inventory (if any), whether /// the node actually answered — the ledger replays the last snapshot when it -/// didn't (see [`NodeLedger::settle`]) — and each device's cache contribution, -/// for the caller to apply and to drive eviction. +/// didn't (see [`NodeLedger::settle`]) — whether the one-shot retry can stop +/// early, and each device's cache contribution for the caller to apply and to +/// drive eviction. struct NodeProbe { inventory: Option, healthy: bool, + complete: bool, outcomes: Vec, } @@ -466,6 +514,7 @@ impl NodeProbe { Self { inventory: None, healthy: false, + complete: false, outcomes: Vec::new(), } } @@ -547,6 +596,7 @@ async fn probe_bolt_receiver( paired, }), healthy: complete, + complete, outcomes, } } @@ -614,12 +664,18 @@ async fn probe_unifying_receiver( ); } // Unlike Bolt, a count/list shortfall is *expected* here (offline paired - // devices aren't enumerable yet), so completeness can't ride on it. The - // health signal is the pairing-count register answering at all: that + // devices aren't enumerable yet), so ledger health can't ride on it. The + // ledger health signal is the pairing-count register answering at all: that // proves the receiver round-trip worked this cycle, while `None` (e.g. a // parked channel) is "couldn't fully check" — the ledger then replays the // last good snapshot instead of presenting a possibly-empty list (#218). + // + // The one-shot CLI path still needs a retry when the count says more + // devices may appear after a late arrival drain. Report that separately as + // `complete = false`; the unchanged-inventory fallback stops expected + // offline Unifying shortfalls after they stabilize. let healthy = pairing_count.is_some(); + let complete = pairing_count.is_some_and(|count| paired.len() == usize::from(count)); NodeProbe { inventory: Some(DeviceInventory { @@ -632,6 +688,7 @@ async fn probe_unifying_receiver( paired, }), healthy, + complete, outcomes, } } @@ -760,6 +817,7 @@ async fn probe_direct( return NodeProbe { inventory: None, healthy: walk_succeeded, + complete: walk_succeeded, outcomes: vec![CacheOutcome::Unkeyed], }; } @@ -792,6 +850,7 @@ async fn probe_direct( NodeProbe { inventory: Some(inventory), healthy: true, + complete: true, outcomes: vec![outcome], } } @@ -1072,10 +1131,12 @@ mod tests { use std::collections::HashSet; use super::{ - CACHE_MISS_GRACE, CacheKey, Cached, Enumerator, ProbedFeatures, REFRESH_TICKS, - UnifiedBatteryFeature, battery_feature_index, is_stale, + CACHE_MISS_GRACE, CacheKey, Cached, Enumerator, ONESHOT_ATTEMPTS, ProbedFeatures, + REFRESH_TICKS, UnifiedBatteryFeature, battery_feature_index, is_stale, + one_shot_should_stop, }; use hidpp::feature::CreatableFeature as _; + use openlogi_core::device::{DeviceInventory, DeviceKind, PairedDevice, ReceiverInfo}; fn cache_entry(probed_tick: u64) -> Cached { Cached { @@ -1085,6 +1146,31 @@ mod tests { } } + fn inventory(slots: &[u8]) -> Vec { + vec![DeviceInventory { + receiver: ReceiverInfo { + name: "Unifying Receiver".to_string(), + vendor_id: 0x046d, + product_id: 0xc52b, + unique_id: Some("receiver-1".to_string()), + }, + paired: slots + .iter() + .copied() + .map(|slot| PairedDevice { + slot, + codename: Some(format!("device-{slot}")), + wpid: Some(0xb000 + u16::from(slot)), + kind: DeviceKind::Mouse, + online: true, + battery: None, + model_info: None, + capabilities: None, + }) + .collect(), + }] + } + #[test] fn cache_entry_survives_grace_then_evicts() { let mut e = Enumerator::default(); @@ -1163,4 +1249,70 @@ mod tests { assert_eq!(battery_feature_index([0x0001, 0x2201, 0x1b04]), None); assert_eq!(battery_feature_index([]), None); } + + #[test] + fn one_shot_retry_stops_when_first_attempt_is_complete() { + let current = inventory(&[1, 2]); + + assert!( + one_shot_should_stop(None, ¤t, true, true, 1), + "complete inventories keep the one-pass happy path" + ); + } + + #[test] + fn one_shot_retry_waits_for_healthy_incomplete_inventory_to_stabilize() { + let partial = inventory(&[1]); + let full = inventory(&[1, 2]); + + assert!( + !one_shot_should_stop(None, &partial, false, true, 1), + "the first incomplete pass has no previous inventory to compare" + ); + assert!( + !one_shot_should_stop(Some(partial.as_slice()), &full, false, true, 2), + "a changed inventory should get another retry window" + ); + assert!( + one_shot_should_stop(Some(full.as_slice()), &full, false, true, 3), + "once the returned inventory stabilizes, retrying stops" + ); + } + + #[test] + fn one_shot_retry_stops_on_unchanged_incomplete_inventory() { + let partial = inventory(&[1]); + + assert!( + one_shot_should_stop(Some(partial.as_slice()), &partial, false, true, 2), + "stable partial inventories should not burn every retry attempt" + ); + } + + #[test] + fn one_shot_retry_keeps_unchanged_inventory_after_unhealthy_probe() { + let partial = inventory(&[1]); + + assert!( + !one_shot_should_stop(Some(partial.as_slice()), &partial, false, false, 2), + "unchanged replay after a failed probe must keep retrying before the cap" + ); + } + + #[test] + fn one_shot_retry_stops_at_attempt_cap_when_inventory_keeps_changing() { + let previous = inventory(&[1]); + let current = inventory(&[1, 2]); + + assert!( + one_shot_should_stop( + Some(previous.as_slice()), + ¤t, + false, + false, + ONESHOT_ATTEMPTS + ), + "the retry loop must remain bounded even if the inventory changes every time" + ); + } }