From c48af1964edb5bc62bae30f2711ddcce211aa966 Mon Sep 17 00:00:00 2001 From: MonteYin Date: Wed, 25 Mar 2026 19:25:52 +0800 Subject: [PATCH 1/4] GTST-18 fix: handle untracked files in gitsift stage - Scan diff for untracked files before staging - Untracked file hunks staged via index.add_path() directly - Tracked file hunks staged via repo.apply() as before - Remaining untracked files added to index before apply() to prevent "index does not contain" failures - 2 new tests: stage with untracked present, stage untracked directly --- src/git/stage.rs | 327 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 244 insertions(+), 83 deletions(-) diff --git a/src/git/stage.rs b/src/git/stage.rs index 5143d6e..72802fa 100644 --- a/src/git/stage.rs +++ b/src/git/stage.rs @@ -1,5 +1,5 @@ use anyhow::{Context, Result}; -use git2::{ApplyLocation, ApplyOptions, Diff, DiffOptions, Repository}; +use git2::{ApplyLocation, ApplyOptions, Delta, Diff, DiffOptions, Repository}; use std::cell::RefCell; use std::collections::{HashMap, HashSet}; use std::path::Path; @@ -14,6 +14,69 @@ fn is_binary_delta(delta: &git2::DiffDelta) -> bool { delta.new_file().is_binary() || delta.old_file().is_binary() } +/// Scan the diff and return a map of hunk_id → (file_path, is_untracked). +fn scan_hunk_metadata(repo: &Repository) -> Result> { + let mut opts = DiffOptions::new(); + opts.context_lines(3); + opts.include_untracked(true); + opts.show_untracked_content(true); + + let diff = repo + .diff_index_to_workdir(None, Some(&mut opts)) + .context("failed to generate diff")?; + + let metadata: RefCell> = RefCell::new(HashMap::new()); + let current: RefCell<(String, bool)> = RefCell::new((String::new(), false)); + + diff.foreach( + &mut |delta, _| { + if is_binary_delta(&delta) { + *current.borrow_mut() = (String::new(), false); + return true; + } + let path = delta + .new_file() + .path() + .or_else(|| delta.old_file().path()) + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); + let untracked = delta.status() == Delta::Untracked; + *current.borrow_mut() = (path, untracked); + true + }, + None, + Some(&mut |_delta, hunk| { + let cur = current.borrow(); + let header = String::from_utf8_lossy(hunk.header()).trim().to_string(); + let id = hunk_id(&cur.0, hunk.old_start(), &header); + metadata + .borrow_mut() + .insert(id, (cur.0.clone(), cur.1)); + true + }), + None, + ) + .context("failed to scan hunk metadata")?; + + Ok(metadata.into_inner()) +} + +/// Stage untracked files by adding them to the index directly. +/// Returns number of files staged. +fn stage_untracked_files(repo: &Repository, paths: &[String]) -> Result { + if paths.is_empty() { + return Ok(0); + } + let mut index = repo.index().context("failed to open index")?; + for path in paths { + index + .add_path(Path::new(path)) + .with_context(|| format!("failed to add untracked file to index: {path}"))?; + } + index.write().context("failed to write index")?; + Ok(paths.len()) +} + /// Collected hunk data from a diff, used for patch reconstruction. struct RawHunk { file_path: String, @@ -197,103 +260,142 @@ pub fn stage_selection(repo_path: &Path, request: &StageRequest) -> Result = + request.hunk_ids.iter().map(|s| s.as_str()).collect(); - let diff = repo - .diff_index_to_workdir(None, Some(&mut opts)) - .context("failed to generate diff")?; + // Separate untracked file hunks from tracked file hunks. + let mut untracked_paths: Vec = Vec::new(); + let mut tracked_ids: HashSet = HashSet::new(); - // Scan available IDs - let available_ids: RefCell> = RefCell::new(HashSet::new()); - let scan_path = RefCell::new(String::new()); + for req_id in &unique_requested { + match hunk_metadata.get(*req_id) { + None => { + errors.push(format!("hunk ID not found: {req_id}")); + failed += 1; + } + Some((path, true)) => { + // Untracked file — stage via git add + if !untracked_paths.contains(path) { + untracked_paths.push(path.clone()); + } + } + Some((_, false)) => { + tracked_ids.insert(req_id.to_string()); + } + } + } - diff.foreach( - &mut |delta, _| { - // Clear path for binary files so no hunk IDs are generated for them - if is_binary_delta(&delta) { - *scan_path.borrow_mut() = String::new(); - } else { - let path = delta + // Stage untracked files directly + let untracked_staged = stage_untracked_files(&repo, &untracked_paths)?; + staged += untracked_staged; + + // For tracked hunks, add untracked files to index first so apply() doesn't choke, + // then apply with hunk filtering. + if !tracked_ids.is_empty() { + // Add any remaining untracked files to index to prevent apply() failure + let all_untracked: Vec = hunk_metadata + .values() + .filter(|(_, is_untracked)| *is_untracked) + .map(|(path, _)| path.clone()) + .collect::>() + .into_iter() + .collect(); + stage_untracked_files(&repo, &all_untracked)?; + + // Re-generate diff (now untracked files are in index) + let mut opts = DiffOptions::new(); + opts.context_lines(3); + + let diff = repo + .diff_index_to_workdir(None, Some(&mut opts)) + .context("failed to generate diff")?; + + // Scan available IDs from the new diff (tracked files only now) + let available_ids: RefCell> = RefCell::new(HashSet::new()); + let scan_path = RefCell::new(String::new()); + + diff.foreach( + &mut |delta, _| { + if is_binary_delta(&delta) { + *scan_path.borrow_mut() = String::new(); + } else { + let path = delta + .new_file() + .path() + .or_else(|| delta.old_file().path()) + .map(|p| p.to_string_lossy().into_owned()) + .unwrap_or_default(); + *scan_path.borrow_mut() = path; + } + true + }, + None, + Some(&mut |_delta, hunk| { + let path = scan_path.borrow(); + let header = String::from_utf8_lossy(hunk.header()).trim().to_string(); + let id = hunk_id(&path, hunk.old_start(), &header); + available_ids.borrow_mut().insert(id); + true + }), + None, + ) + .context("failed to scan diff for hunk IDs")?; + + // Filter tracked_ids to only those that exist in the new diff + let available_ids = available_ids.into_inner(); + let valid_ids: HashSet = tracked_ids + .iter() + .filter(|id| available_ids.contains(id.as_str())) + .cloned() + .collect(); + + if !valid_ids.is_empty() { + let current_path = RefCell::new(String::new()); + + let mut apply_opts = ApplyOptions::new(); + apply_opts.delta_callback(|delta| { + let d = match delta { + Some(d) => d, + None => return false, + }; + if is_binary_delta(&d) { + return false; + } + let path = d .new_file() .path() - .or_else(|| delta.old_file().path()) + .or_else(|| d.old_file().path()) .map(|p| p.to_string_lossy().into_owned()) .unwrap_or_default(); - *scan_path.borrow_mut() = path; - } - true - }, - None, - Some(&mut |_delta, hunk| { - let path = scan_path.borrow(); - let header = String::from_utf8_lossy(hunk.header()).trim().to_string(); - let id = hunk_id(&path, hunk.old_start(), &header); - available_ids.borrow_mut().insert(id); - true - }), - None, - ) - .context("failed to scan diff for hunk IDs")?; - - let available_ids = available_ids.into_inner(); - let unique_requested: HashSet<&str> = request.hunk_ids.iter().map(|s| s.as_str()).collect(); - - let mut valid_ids: HashSet = HashSet::new(); - for req_id in &unique_requested { - if available_ids.contains(*req_id) { - valid_ids.insert(req_id.to_string()); - } else { - errors.push(format!("hunk ID not found: {req_id}")); - failed += 1; + *current_path.borrow_mut() = path; + true + }); + apply_opts.hunk_callback(|hunk| { + let hunk = match hunk { + Some(h) => h, + None => return false, + }; + let path = current_path.borrow(); + let header = String::from_utf8_lossy(hunk.header()).trim().to_string(); + let id = hunk_id(&path, hunk.old_start(), &header); + valid_ids.contains(&id) + }); + + repo.apply(&diff, ApplyLocation::Index, Some(&mut apply_opts)) + .context("failed to apply selected hunks to index")?; + + staged += valid_ids.len(); } } - - if !valid_ids.is_empty() { - let current_path = RefCell::new(String::new()); - - let mut apply_opts = ApplyOptions::new(); - apply_opts.delta_callback(|delta| { - let d = match delta { - Some(d) => d, - None => return false, - }; - if is_binary_delta(&d) { - return false; - } - let path = d - .new_file() - .path() - .or_else(|| d.old_file().path()) - .map(|p| p.to_string_lossy().into_owned()) - .unwrap_or_default(); - *current_path.borrow_mut() = path; - true - }); - apply_opts.hunk_callback(|hunk| { - let hunk = match hunk { - Some(h) => h, - None => return false, - }; - let path = current_path.borrow(); - let header = String::from_utf8_lossy(hunk.header()).trim().to_string(); - let id = hunk_id(&path, hunk.old_start(), &header); - valid_ids.contains(&id) - }); - - repo.apply(&diff, ApplyLocation::Index, Some(&mut apply_opts)) - .context("failed to apply selected hunks to index")?; - - staged += valid_ids.len(); - } } // --- Line-level staging --- @@ -819,4 +921,63 @@ mod tests { let working_content = fs::read_to_string(dir.path().join("code.txt")).unwrap(); assert_eq!(index_content, working_content); } + + // ===== Untracked files (GTST-18) ===== + + #[test] + fn stage_with_untracked_files_present() { + let (dir, repo) = setup_two_hunk_repo(); + + // Add an untracked file alongside tracked changes + fs::write(dir.path().join("new_file.txt"), "brand new\n").unwrap(); + + // Should be able to stage tracked file hunks despite untracked file + let output = diff_unstaged(dir.path(), None).unwrap(); + let tracked_hunk = output + .files + .iter() + .find(|f| f.path == "file.txt") + .unwrap() + .hunks[0] + .id + .clone(); + + let request = StageRequest { + hunk_ids: vec![tracked_hunk], + line_selections: vec![], + }; + let result = stage_selection(dir.path(), &request).unwrap(); + assert_eq!(result.staged, 1); + assert_eq!(result.failed, 0); + assert!(result.errors.is_empty()); + + assert!(count_staged_hunks(&repo) >= 1); + } + + #[test] + fn stage_untracked_file_hunk_directly() { + let (dir, _repo) = setup_two_hunk_repo(); + + // Create untracked file + fs::write(dir.path().join("brand_new.py"), "print('hello')\n").unwrap(); + + // Get hunk ID for the untracked file + let output = diff_unstaged(dir.path(), None).unwrap(); + let new_file = output.files.iter().find(|f| f.path == "brand_new.py"); + assert!(new_file.is_some(), "untracked file should appear in diff"); + let new_hunk_id = new_file.unwrap().hunks[0].id.clone(); + + // Should be able to stage the untracked file's hunk + let request = StageRequest { + hunk_ids: vec![new_hunk_id], + line_selections: vec![], + }; + let result = stage_selection(dir.path(), &request).unwrap(); + assert_eq!(result.staged, 1); + assert_eq!(result.failed, 0); + + // Verify file is staged + let index_content = read_index_content(dir.path(), "brand_new.py"); + assert_eq!(index_content, "print('hello')\n"); + } } From 2c9bd79298113899657db5656f847449f949a1f4 Mon Sep 17 00:00:00 2001 From: MonteYin Date: Wed, 25 Mar 2026 19:36:26 +0800 Subject: [PATCH 2/4] GTST-18 fix: address review findings - Remove silent staging of all untracked files when only tracked hunks requested (was corrupting staging area) - Count untracked hunks instead of file paths for staged count - Reject line-level staging for untracked files with clear error message - Add assertion that untracked file is NOT in index after tracked-only staging --- src/git/stage.rs | 39 ++++++++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/src/git/stage.rs b/src/git/stage.rs index 72802fa..11782ad 100644 --- a/src/git/stage.rs +++ b/src/git/stage.rs @@ -274,6 +274,7 @@ pub fn stage_selection(repo_path: &Path, request: &StageRequest) -> Result = Vec::new(); + let mut untracked_hunk_count = 0usize; let mut tracked_ids: HashSet = HashSet::new(); for req_id in &unique_requested { @@ -287,6 +288,7 @@ pub fn stage_selection(repo_path: &Path, request: &StageRequest) -> Result { tracked_ids.insert(req_id.to_string()); @@ -295,23 +297,12 @@ pub fn stage_selection(repo_path: &Path, request: &StageRequest) -> Result = hunk_metadata - .values() - .filter(|(_, is_untracked)| *is_untracked) - .map(|(path, _)| path.clone()) - .collect::>() - .into_iter() - .collect(); - stage_untracked_files(&repo, &all_untracked)?; - - // Re-generate diff (now untracked files are in index) + // Diff excludes untracked files — apply() only sees tracked changes let mut opts = DiffOptions::new(); opts.context_lines(3); @@ -403,6 +394,16 @@ pub fn stage_selection(repo_path: &Path, request: &StageRequest) -> Result= 1); + + // Verify untracked file is NOT in the index (not silently staged) + let fresh_repo = Repository::open(dir.path()).unwrap(); + let index = fresh_repo.index().unwrap(); + assert!( + index.get_path(Path::new("new_file.txt"), 0).is_none(), + "untracked file should NOT be in index when only tracked hunk was staged" + ); } #[test] From 96d8503754eb58a28740bd90f22e0cb9b86fa065 Mon Sep 17 00:00:00 2001 From: MonteYin Date: Wed, 25 Mar 2026 19:39:32 +0800 Subject: [PATCH 3/4] GTST-18 test: add mixed tracked+untracked staging test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cover the case where both tracked and untracked hunk IDs are in the same request — verifies both are staged correctly. --- src/git/stage.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/src/git/stage.rs b/src/git/stage.rs index 11782ad..a165977 100644 --- a/src/git/stage.rs +++ b/src/git/stage.rs @@ -989,4 +989,44 @@ mod tests { let index_content = read_index_content(dir.path(), "brand_new.py"); assert_eq!(index_content, "print('hello')\n"); } + + #[test] + fn stage_mixed_tracked_and_untracked_hunks() { + let (dir, _repo) = setup_two_hunk_repo(); + + // Create untracked file + fs::write(dir.path().join("new.txt"), "new content\n").unwrap(); + + let output = diff_unstaged(dir.path(), None).unwrap(); + let tracked_id = output + .files + .iter() + .find(|f| f.path == "file.txt") + .unwrap() + .hunks[0] + .id + .clone(); + let untracked_id = output + .files + .iter() + .find(|f| f.path == "new.txt") + .unwrap() + .hunks[0] + .id + .clone(); + + // Stage both in one request + let request = StageRequest { + hunk_ids: vec![tracked_id, untracked_id], + line_selections: vec![], + }; + let result = stage_selection(dir.path(), &request).unwrap(); + assert_eq!(result.staged, 2); + assert_eq!(result.failed, 0); + assert!(result.errors.is_empty()); + + // Both files should be in index + let new_content = read_index_content(dir.path(), "new.txt"); + assert_eq!(new_content, "new content\n"); + } } From 4c1f08eb79b8113870d9b9fec7647afd32c00ebd Mon Sep 17 00:00:00 2001 From: MonteYin Date: Wed, 25 Mar 2026 19:41:47 +0800 Subject: [PATCH 4/4] GTST-18 chore: cargo fmt --- src/git/stage.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/git/stage.rs b/src/git/stage.rs index a165977..1f36d34 100644 --- a/src/git/stage.rs +++ b/src/git/stage.rs @@ -49,9 +49,7 @@ fn scan_hunk_metadata(repo: &Repository) -> Result Result = - request.hunk_ids.iter().map(|s| s.as_str()).collect(); + let unique_requested: HashSet<&str> = request.hunk_ids.iter().map(|s| s.as_str()).collect(); // Separate untracked file hunks from tracked file hunks. let mut untracked_paths: Vec = Vec::new();