From c27d4a5b16f792b7f0d5420807589f1918cf63dd Mon Sep 17 00:00:00 2001 From: Doll Date: Thu, 14 May 2026 01:17:25 -0500 Subject: [PATCH] fix(init): stop persisting TUI '(text)' placeholder and adjacent multi-agent push fixes (#739) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The init walkthrough was writing the UI placeholder "(text)" into hook-config.json for every ConfigType::String key, corrupting tracker_remote (push fails with RemoteMisconfigured('(text)')) and both sentinel.*.model keys on every new repo. populate_tracker_remote treated the corrupt value as a manual edit and refused to repair it, so even `crosslink init --force` left the config broken. While tracing the resulting "new repo never works" symptom, three adjacent multi-agent bugs surfaced that share the same blast radius (fresh-remote bootstrap not propagating). All four are fixed together so a freshly-inited repo actually round-trips between agents. * config_registry.rs (build_config): add `ConfigType::String => continue` so the UI placeholder is never persisted; apply_tui_choices now preserves the template defaults. * init/mod.rs (populate_tracker_remote): treat "(text)" as a corrupt template default to be auto-repaired alongside the literal "origin", so existing affected repos self-heal on `crosslink init --force`. Collapsed the new arm with the absent-key case (clippy). * sync/mod.rs (read_tracker_remote): defensive guard — when the on-disk value is "(text)", warn once with remediation instructions and fall back to "origin", so unpatched repos see actionable text instead of a generic push failure. * sync/cache.rs (push_hub_if_ahead) + commands/locks_cmd.rs (sync_cmd): sync now pushes the hub branch when local is ahead of remote OR the remote branch doesn't exist. Previously, pushes only happened lazily via SharedWriter::commit_*, leaving a race window where agent B's init_cache saw an empty remote (because A's bootstrap had never been pushed), created its own orphan branch, and then couldn't rebase onto A's eventual push. * sync/mod.rs (classify_push_failure): expanded NonFastForward substring set to cover "! [remote rejected]", "cannot lock ref", "incorrect old value provided", and "failed to push some refs" — the four shapes git emits for concurrent-ref-update races. Pre-fix, parallel lock claims both fell through to Other, both call sites returned LocalOnly, and both agents printed "Claimed lock on issue #1". * commands/cpitd.rs: add `#[serde(default, alias = "total_groups")]` to CpitdOutput::total_pairs to tolerate the cpitd 0.3.0 schema rename. Full schema migration for non-empty clone_reports is separate. Tests added (8): - walkthrough_core_tests::build_config_never_emits_string_typed_keys_with_text_placeholder - 3 × test_populate_tracker_remote_repairs_text_placeholder_{with_detected, without_remotes, to_non_origin} - test_read_tracker_remote_falls_back_when_corrupt_placeholder - classify_push_failure_concurrent_ref_lock_is_non_ff - classify_push_failure_remote_rejected_variant - 4 previously-failing smoke tests now pass: cpitd_scan_dry_run, parallel_lock_claim_one_winner, two_agents_create_issues, adversarial_same_issue_write_conflict_convergence Full suite green: 1883 lib + 2859 bin + 194 integ + 159 smoke pass, 0 failures, fmt clean, no new clippy errors introduced. Co-Authored-By: Claude Opus 4.7 (1M context) --- crosslink/src/commands/config_registry.rs | 63 +++++++++++++++++ crosslink/src/commands/cpitd.rs | 9 +++ crosslink/src/commands/init/mod.rs | 78 ++++++++++++++++++-- crosslink/src/commands/locks_cmd.rs | 15 ++++ crosslink/src/sync/cache.rs | 86 +++++++++++++++++++++++ crosslink/src/sync/mod.rs | 42 ++++++++++- crosslink/src/sync/tests.rs | 60 ++++++++++++++++ 7 files changed, 345 insertions(+), 8 deletions(-) diff --git a/crosslink/src/commands/config_registry.rs b/crosslink/src/commands/config_registry.rs index 98810f5a2..0e096765d 100644 --- a/crosslink/src/commands/config_registry.rs +++ b/crosslink/src/commands/config_registry.rs @@ -549,6 +549,17 @@ impl WalkthroughCore { "true" => serde_json::Value::Bool(true), _ => serde_json::Value::Bool(false), }, + // `ConfigType::String` options are a render-only + // placeholder (`"(text)"`) — the TUI has no + // text-input affordance today, so the placeholder + // is never an intentional value. Persisting it + // corrupts every `String`-typed key on every new + // repo (GH#739: `tracker_remote = "(text)"` made + // push fail with `RemoteMisconfigured`; the + // sentinel model keys silently broke too). Skip + // — `apply_tui_choices` then preserves the + // template default. + ConfigType::String => continue, _ => serde_json::Value::String(val_str.to_string()), }; result.insert(entry.key.to_string(), val); @@ -558,3 +569,55 @@ impl WalkthroughCore { result } } + +#[cfg(test)] +mod walkthrough_core_tests { + use super::*; + + /// Regression test for GH#739. + /// + /// `options_for_key` returns `vec!["(text)"]` for every + /// `ConfigType::String` key as a render-only UI placeholder. + /// Before the fix, `build_config` would dump that placeholder + /// into the persisted config; `apply_tui_choices` then overwrote + /// the template defaults, leaving every new repo with + /// `tracker_remote = "(text)"` and every push failing as + /// `RemoteMisconfigured`. The fix makes `build_config` skip + /// `ConfigType::String` entries entirely so the template + /// defaults survive. This test enforces that contract. + #[test] + fn build_config_never_emits_string_typed_keys_with_text_placeholder() { + let empty = serde_json::json!({}); + let core = WalkthroughCore::new(&empty, 0); + let built = core.build_config(); + + // Every String-typed key in the registry must be absent from + // the build output (so the embedded template's value survives). + let string_keys: Vec<&str> = REGISTRY + .iter() + .filter(|e| matches!(e.config_type, ConfigType::String)) + .map(|e| e.key) + .collect(); + assert!( + !string_keys.is_empty(), + "registry must contain at least one ConfigType::String key for this test to be meaningful" + ); + for key in &string_keys { + assert!( + !built.contains_key(*key), + "GH#739: build_config emitted a value for ConfigType::String key '{key}' \ + (would overwrite the template default with the UI placeholder '(text)')" + ); + } + + // Belt-and-braces: no value in the entire build output may + // be the literal "(text)" placeholder, regardless of type. + for (k, v) in &built { + assert_ne!( + v.as_str(), + Some("(text)"), + "GH#739: build_config emitted the '(text)' UI placeholder as the value for '{k}'" + ); + } + } +} diff --git a/crosslink/src/commands/cpitd.rs b/crosslink/src/commands/cpitd.rs index 201022f85..435924bc7 100644 --- a/crosslink/src/commands/cpitd.rs +++ b/crosslink/src/commands/cpitd.rs @@ -34,7 +34,16 @@ use crate::utils::format_issue_id; #[derive(Debug, Deserialize)] struct CpitdOutput { + #[serde(default)] clone_reports: Vec, + /// `total_pairs` is the field name in cpitd ≤ 0.2.x; cpitd 0.3.0 + /// renamed it to `total_groups` to reflect the move from "pairs of + /// files" to "N-way clone groups". Accept either via `serde(alias)` + /// so the empty-output path doesn't fail to parse. The full schema + /// migration for non-empty `clone_reports` (new `locations` model + /// vs. old `file_a`/`file_b`/`groups`) is a separate concern — + /// this annotation just unblocks the "no clones found" path. + #[serde(default, alias = "total_groups")] total_pairs: usize, } diff --git a/crosslink/src/commands/init/mod.rs b/crosslink/src/commands/init/mod.rs index 7ade786df..1d2a2cf86 100644 --- a/crosslink/src/commands/init/mod.rs +++ b/crosslink/src/commands/init/mod.rs @@ -341,10 +341,11 @@ fn populate_tracker_remote(config_path: &Path, project_root: &Path) -> Result<() .and_then(|v| v.as_str()) .map(String::from); - // Anything other than the literal template default is a manual + // Anything other than the literal template default ("origin") or + // the legacy corrupt placeholder "(text)" (GH#739) is a manual // edit — never touch it. if let Some(v) = ¤t { - if v != "origin" { + if v != "origin" && v != "(text)" { return Ok(()); } } @@ -364,10 +365,14 @@ fn populate_tracker_remote(config_path: &Path, project_root: &Path) -> Result<() (Some("origin"), None | Some("origin")) => return Ok(()), // Template-default + repo has a real non-origin remote: upgrade. (Some("origin"), Some(other)) => other.to_string(), - // Key absent (older config or hand-edit removed it): restore - // it from detection or fall back to the template default. - (None, Some(d)) => d.to_string(), - (None, None) => "origin".to_string(), + // Key absent (older config or hand-edit removed it) OR the + // corrupt "(text)" placeholder from pre-#739 builds. Both are + // treated as "no real value present": restore from detection, + // or fall back to the template default. No byte-equality + // preservation in the "(text)" case since the existing value + // is wrong by definition. + (Some("(text)") | None, Some(d)) => d.to_string(), + (Some("(text)") | None, None) => "origin".to_string(), // Manual non-default — short-circuited at the top of the // function. This arm is unreachable but exhausts the match. (Some(_), _) => return Ok(()), @@ -2862,4 +2867,65 @@ mod tests { populate_tracker_remote(&cfg, dir.path()).unwrap(); assert!(!cfg.exists(), "should not create config when missing"); } + + #[test] + fn test_populate_tracker_remote_repairs_text_placeholder_with_detected() { + // GH#739: older builds wrote the literal UI placeholder + // "(text)" into hook-config.json. populate_tracker_remote must + // treat that as repairable (not as a manual edit) and overwrite + // it with the detected remote. + let dir = test_dir(); + add_remote(dir.path(), "origin", "https://github.com/me/r.git"); + let cfg = write_minimal_hook_config(dir.path(), r#"{"tracker_remote": "(text)"}"#); + + populate_tracker_remote(&cfg, dir.path()).unwrap(); + + let after: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&cfg).unwrap()).unwrap(); + assert_eq!( + after.get("tracker_remote").and_then(|v| v.as_str()), + Some("origin"), + "corrupt '(text)' placeholder must be repaired to the detected remote" + ); + } + + #[test] + fn test_populate_tracker_remote_repairs_text_placeholder_without_remotes() { + // GH#739: even when no git remotes are configured yet, the + // corrupt "(text)" placeholder must be replaced with the + // template default "origin" so subsequent operations don't + // fail with RemoteMisconfigured('(text)'). + let dir = test_dir(); + let cfg = write_minimal_hook_config(dir.path(), r#"{"tracker_remote": "(text)"}"#); + + populate_tracker_remote(&cfg, dir.path()).unwrap(); + + let after: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&cfg).unwrap()).unwrap(); + assert_eq!( + after.get("tracker_remote").and_then(|v| v.as_str()), + Some("origin"), + "corrupt '(text)' must fall back to 'origin' when no remotes are detected" + ); + } + + #[test] + fn test_populate_tracker_remote_repairs_text_placeholder_to_non_origin() { + // GH#739: when the repo's only remote is non-origin, the + // repair path must use the detected name (same upgrade path + // as the template default). + let dir = test_dir(); + add_remote(dir.path(), "upstream", "https://example.com/r.git"); + let cfg = write_minimal_hook_config(dir.path(), r#"{"tracker_remote": "(text)"}"#); + + populate_tracker_remote(&cfg, dir.path()).unwrap(); + + let after: serde_json::Value = + serde_json::from_str(&fs::read_to_string(&cfg).unwrap()).unwrap(); + assert_eq!( + after.get("tracker_remote").and_then(|v| v.as_str()), + Some("upstream"), + "corrupt '(text)' must upgrade to the only non-origin remote" + ); + } } diff --git a/crosslink/src/commands/locks_cmd.rs b/crosslink/src/commands/locks_cmd.rs index 1c6c0c670..5ebc29a8e 100644 --- a/crosslink/src/commands/locks_cmd.rs +++ b/crosslink/src/commands/locks_cmd.rs @@ -414,6 +414,21 @@ pub fn sync_cmd(crosslink_dir: &Path, db: &Database) -> Result<()> { println!("Cache: {}", sync.cache_path().display()); + // Publish any local-only hub commits to the remote. Bootstrap, + // signing-key registration, and (pre-v2) layout migrations all + // produce commits in the hub cache that would otherwise sit + // unpushed until the operator happened to call a SharedWriter + // path (`create`, `comment`, etc.). On a fresh remote that + // creates a race window where a second agent's `init_cache` + // sees the empty remote, creates its own orphan branch, and + // then can never rebase onto the first agent's eventual push + // (the bootstrap commits look "previously applied"). Pushing + // here closes that window. See `push_hub_if_ahead` for the + // error-handling contract. + if let Err(e) = sync.push_hub_if_ahead() { + tracing::warn!("post-sync hub push failed: {e}"); + } + // Verify commit signature (SSH or GPG) let verification = sync.verify_locks_signature()?; match &verification { diff --git a/crosslink/src/sync/cache.rs b/crosslink/src/sync/cache.rs index df46f5d87..96f90c995 100644 --- a/crosslink/src/sync/cache.rs +++ b/crosslink/src/sync/cache.rs @@ -766,4 +766,90 @@ impl SyncManager { // happen. Treat as an error rather than silently returning Ok. bail!("Push loop completed without returning — this is a bug") } + + /// Push the local hub branch to the configured remote if (and only + /// if) there are local commits ahead of the remote, OR the remote + /// branch doesn't exist yet. + /// + /// Returns `Ok(true)` if a push was attempted and succeeded, + /// `Ok(false)` if no push was needed (or the push was classified + /// as a recoverable transient — offline, misconfigured, etc.). + /// Push failures are logged via the structured `PushFailure` + /// classifier (GH#586) and never bubble up: the local hub state + /// stays consistent, and the next `sync` retries. + /// + /// This exists because `sync_cmd` previously did not push at all — + /// pushes only happened lazily via `SharedWriter::commit_*` when + /// issues/comments/etc. were written. That left a multi-agent + /// race: agent A's first sync would bootstrap the hub locally + /// (registering signing keys, initializing layout) but never + /// publish those commits. If agent B inited between A's bootstrap + /// sync and A's first write, B's `init_cache` saw an empty remote, + /// created its own orphan branch, and then could not rebase onto + /// A's eventual push because the bootstrap commits looked + /// "already applied". Pushing at the end of sync closes that + /// window. + pub fn push_hub_if_ahead(&self) -> Result { + // No cache → nothing to push. + if !self.cache_dir.exists() { + return Ok(false); + } + + // Check whether the remote already has the branch. If it + // doesn't, we always want to push (even if `count_unpushed_commits` + // returns 0 — which it would, because the rev-list against a + // non-existent remote ref errors out). + let remote_has_hub = self + .git_in_repo(&["ls-remote", "--heads", &self.remote, HUB_BRANCH]) + .is_ok_and(|o| !String::from_utf8_lossy(&o.stdout).trim().is_empty()); + + if remote_has_hub && self.count_unpushed_commits() == 0 { + return Ok(false); + } + + // Use the hub write lock to serialize with concurrent writes + // (#457). Without this, a parallel SharedWriter::commit may + // race the push and produce a non-fast-forward. + let _lock_guard = self.acquire_lock()?; + + let push_result = self.git_in_cache(&["push", &self.remote, HUB_BRANCH]); + match push_result { + Ok(_) => Ok(true), + Err(e) => { + let err_str = e.to_string(); + match super::classify_push_failure(&err_str, &self.remote) { + // Transient. Local state is fine; next sync retries. + super::PushFailure::Offline => Ok(false), + // Surface loudly via the classifier; don't fail + // sync — it has already done useful local work + // (hydration, locks, etc.) that the user should + // benefit from. + f @ (super::PushFailure::RemoteMisconfigured { .. } + | super::PushFailure::AuthFailed + | super::PushFailure::Other(_)) => { + tracing::error!("{}", f.user_message("sync")); + Ok(false) + } + // Local is behind. The preceding `fetch` + rebase + // in `sync_cmd` already attempted to reconcile; if + // it couldn't, the local branch has diverged and + // needs human intervention. Warn and bail out of + // the push without failing the whole sync. + super::PushFailure::NonFastForward => { + tracing::warn!( + "push deferred: local hub branch has diverged from {}/{}. \ + Run `crosslink sync` again or resolve manually with \ + `cd {} && git log --oneline {}/{}..HEAD`.", + self.remote, + HUB_BRANCH, + self.cache_dir.display(), + self.remote, + HUB_BRANCH + ); + Ok(false) + } + } + } + } + } } diff --git a/crosslink/src/sync/mod.rs b/crosslink/src/sync/mod.rs index 1f026e72f..dec60aab5 100644 --- a/crosslink/src/sync/mod.rs +++ b/crosslink/src/sync/mod.rs @@ -103,8 +103,25 @@ pub(crate) fn classify_push_failure(err_str: &str, remote: &str) -> PushFailure }; } - // 3. Non-fast-forward — local is behind remote. - if err_str.contains("! [rejected]") || err_str.contains("non-fast-forward") { + // 3. Non-fast-forward — local is behind remote. Git emits this + // family in several shapes depending on whether the rejection + // came from the local pre-push check, the remote update hook, + // or the concurrent-update race (two clients pushing the same + // ref with the same expected old SHA). All variants map to + // the same recovery action (pull/rebase + retry), so they + // share a bucket. Pre-fix, the concurrent-claim race + // miscategorized "cannot lock ref" / "remote rejected" as + // `Other` and silently returned "saved locally" while both + // agents thought they won the lock. + let non_ff_markers = [ + "! [rejected]", + "! [remote rejected]", + "non-fast-forward", + "cannot lock ref", + "incorrect old value provided", + "failed to push some refs", + ]; + if non_ff_markers.iter().any(|m| err_str.contains(m)) { return PushFailure::NonFastForward; } @@ -162,6 +179,7 @@ impl PushFailure { /// `SyncManager::remote_exists()` to validate the remote. pub fn read_tracker_remote(crosslink_dir: &Path) -> String { static WARNED: Once = Once::new(); + static CORRUPT_WARNED: Once = Once::new(); let config_path = crosslink_dir.join("hook-config.json"); let configured = std::fs::read_to_string(&config_path) @@ -173,6 +191,26 @@ pub fn read_tracker_remote(crosslink_dir: &Path) -> String { }); if let Some(remote) = configured { + // GH#739 — pre-fix builds of the init walkthrough wrote the + // TUI placeholder "(text)" into hook-config.json for every + // `ConfigType::String` key. Detect that here and warn the + // user once, falling back to "origin" so sync doesn't bail + // with a (correct but unhelpful) RemoteMisconfigured error. + // The permanent fix is `crosslink config set tracker_remote + // ` or `crosslink init --force` (which now auto-repairs + // the corrupt placeholder). + if remote == "(text)" { + CORRUPT_WARNED.call_once(|| { + tracing::warn!( + "tracker_remote in {} is the corrupt placeholder \"(text)\" \ + (GH#739). Falling back to \"origin\". Repair with: \ + `crosslink config set tracker_remote ` or \ + `crosslink init --force`.", + config_path.display() + ); + }); + return "origin".to_string(); + } return remote; } diff --git a/crosslink/src/sync/tests.rs b/crosslink/src/sync/tests.rs index 1626e5533..fb727ca65 100644 --- a/crosslink/src/sync/tests.rs +++ b/crosslink/src/sync/tests.rs @@ -840,6 +840,30 @@ fn test_read_tracker_remote_custom_value() { assert_eq!(remote, "upstream"); } +#[test] +fn test_read_tracker_remote_falls_back_when_corrupt_placeholder() { + // GH#739: older builds of the init walkthrough wrote the literal + // UI placeholder "(text)" into hook-config.json for every + // ConfigType::String key. read_tracker_remote() must detect this + // corrupt sentinel and fall back to "origin" so push/sync don't + // fail with the (correct but unhelpful) RemoteMisconfigured("(text)") + // error. The permanent fix is `crosslink config set tracker_remote + // ` or `crosslink init --force`. + let dir = tempdir().unwrap(); + let crosslink_dir = dir.path().join(".crosslink"); + std::fs::create_dir_all(&crosslink_dir).unwrap(); + std::fs::write( + crosslink_dir.join("hook-config.json"), + r#"{"tracker_remote":"(text)"}"#, + ) + .unwrap(); + let remote = read_tracker_remote(&crosslink_dir); + assert_eq!( + remote, "origin", + "corrupt '(text)' placeholder must fall back to 'origin'" + ); +} + // ------------------------------------------------------------------ // SyncManager::new() with hook-config.json having a tracker_remote key // ------------------------------------------------------------------ @@ -3610,6 +3634,42 @@ fn classify_push_failure_non_fast_forward() { ); } +#[test] +fn classify_push_failure_concurrent_ref_lock_is_non_ff() { + // When two clients push the same ref simultaneously with the + // same expected old SHA, git's bare-repo update logic rejects + // the second push with "cannot lock ref" / "remote rejected" / + // "incorrect old value provided". Before the fix, the classifier + // miscategorized this as `Other`, the call site returned LocalOnly + // ("saved locally"), and both agents thought they had won the + // lock. Verify the rejection now lands in `NonFastForward` so + // `claim_lock_v2`'s retry path runs and one agent correctly + // returns `Contended`. + let stderr = "remote: error: cannot lock ref 'refs/heads/crosslink/hub': \ + is at abc123 but expected def456\n\ + To /tmp/bare.git\n\ + ! [remote rejected] crosslink/hub -> crosslink/hub \ + (incorrect old value provided)\n\ + error: failed to push some refs to '/tmp/bare.git'"; + assert_eq!( + classify_push_failure(stderr, "origin"), + PushFailure::NonFastForward, + "concurrent-ref-lock rejection must be classified as NonFastForward so the \ + caller's retry/recovery path runs" + ); +} + +#[test] +fn classify_push_failure_remote_rejected_variant() { + // Bare "! [remote rejected]" (without the "cannot lock ref" + // prefix) — also a non-fast-forward family. + let stderr = "To /tmp/r.git\n ! [remote rejected] main -> main (some hook reason)"; + assert_eq!( + classify_push_failure(stderr, "origin"), + PushFailure::NonFastForward + ); +} + #[test] fn classify_push_failure_auth_publickey() { // Common SSH auth-failure stderr.