From c055568e0d784ed5be75ef8c5c56a460e4cb7a31 Mon Sep 17 00:00:00 2001 From: MonteYin Date: Mon, 30 Mar 2026 16:57:47 +0800 Subject: [PATCH 1/2] GTST-21 feat: add hunk-level checkout and diff --staged support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add selective discard of changes at hunk granularity: - `gitsift checkout --hunk-ids` discards unstaged hunks (workdir → index) - `gitsift checkout --staged --hunk-ids` discards staged hunks (index → HEAD) - `gitsift diff --staged` shows staged changes (HEAD vs index) - Protocol support: checkout method with staged param, diff staged param - Handles untracked files (deletion) and staged new files (index removal) New file: src/git/checkout.rs (reverse-patch reconstruction + apply) Updated: cli, models, diff, output, toon, protocol, main, docs, skill, tests --- README.md | 33 +- skills/gitsift-staging/SKILL.md | 62 +++- src/cli.rs | 20 +- src/git/checkout.rs | 639 ++++++++++++++++++++++++++++++++ src/git/diff.rs | 156 ++++++++ src/git/mod.rs | 1 + src/main.rs | 22 +- src/models.rs | 91 ++++- src/output.rs | 10 +- src/protocol.rs | 14 +- src/toon.rs | 17 + tests/cli.rs | 204 ++++++++++ tests/edge_cases.rs | 87 +++++ 13 files changed, 1336 insertions(+), 20 deletions(-) create mode 100644 src/git/checkout.rs diff --git a/README.md b/README.md index 6a42855..edd96c4 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,13 @@ # gitsift -Git hunk sifter for code agents. A selective staging tool (`git add -p` replacement) designed for CLI agents like Claude Code and Codex. +Git hunk sifter for code agents. A selective staging and checkout tool (`git add -p` / `git checkout -p` replacement) designed for CLI agents like Claude Code and Codex. ## Features - **Hunk-level staging** — stage entire hunks by ID - **Line-level staging** — stage individual lines within a hunk via patch reconstruction +- **Hunk-level checkout** — discard unstaged or staged changes per hunk +- **Staged diff** — view staged changes (HEAD vs index) with `diff --staged` - **Compact output** — token-efficient default format (~40% smaller than JSON), inspired by [TOON](https://toonformat.dev/) - **JSON output** — full structured diff output with file/hunk/line metadata - **JSON-lines protocol** — persistent stdin/stdout mode for agent sessions @@ -51,12 +53,24 @@ gitsift diff --format json # Filter by file gitsift diff --file src/main.rs +# List staged changes (HEAD vs index) +gitsift diff --staged + # Stage hunks by ID gitsift stage --hunk-ids abc123,def456 # Stage via JSON on stdin (supports line-level selections) echo '{"hunk_ids": ["abc123"]}' | gitsift stage --from-stdin +# Discard unstaged hunks (revert workdir → index) +gitsift checkout --hunk-ids abc123,def456 + +# Discard staged hunks (revert index → HEAD) +gitsift checkout --staged --hunk-ids abc123 + +# Discard via JSON on stdin +echo '{"hunk_ids": ["abc123"]}' | gitsift checkout --from-stdin + # Show staging status gitsift status ``` @@ -73,7 +87,10 @@ Send JSON requests on stdin, receive JSON responses on stdout: ```json {"method": "diff", "params": {"file": "src/main.rs"}} +{"method": "diff", "params": {"staged": true}} {"method": "stage", "params": {"hunk_ids": ["abc123"]}} +{"method": "checkout", "params": {"hunk_ids": ["abc123"]}} +{"method": "checkout", "params": {"hunk_ids": ["abc123"], "staged": true}} {"method": "status"} ``` @@ -111,13 +128,20 @@ files[1]: ## Agent integration -gitsift is designed for the following workflow: +gitsift is designed for the following workflows: -1. Agent calls `gitsift diff` to inspect available changes +**Selective staging:** +1. Agent calls `gitsift diff` to inspect unstaged changes 2. Agent selects hunks/lines to stage based on the structured output 3. Agent calls `gitsift stage --hunk-ids ` or pipes a `StageRequest` via `--from-stdin` 4. Agent calls `gitsift status` to verify staging result +**Selective discard:** +1. Agent calls `gitsift diff` (or `gitsift diff --staged`) to inspect changes +2. Agent selects hunks to discard +3. Agent calls `gitsift checkout --hunk-ids ` (or `--staged` for staged changes) +4. Agent calls `gitsift diff` to verify the changes were discarded + For persistent sessions, use `gitsift protocol` to avoid process startup overhead. ## Architecture @@ -129,8 +153,9 @@ src/ ├── models.rs # Serde types: Hunk, HunkLine, DiffOutput, StageRequest, Response ├── git/ │ ├── mod.rs # shared git2 helpers (diff_opts, delta_path, hunk_header, etc.) -│ ├── diff.rs # diff engine: git2 diff_index_to_workdir → Vec +│ ├── diff.rs # diff engine: diff_unstaged (index→workdir), diff_staged (HEAD→index) │ ├── stage.rs # staging: hunk-level via ApplyOptions, line-level via patch reconstruction +│ ├── checkout.rs # checkout (discard): hunk-level reverse-patch for unstaged and staged changes │ └── status.rs # staging status summary ├── protocol.rs # stdin/stdout JSON-lines request/response loop ├── toon.rs # compact output format (TOON-inspired, token-efficient) diff --git a/skills/gitsift-staging/SKILL.md b/skills/gitsift-staging/SKILL.md index 748cbe9..4f9884b 100644 --- a/skills/gitsift-staging/SKILL.md +++ b/skills/gitsift-staging/SKILL.md @@ -1,12 +1,12 @@ --- name: gitsift-staging -description: "Selective git staging with gitsift — stage specific hunks or individual lines instead of entire files. Use this skill whenever the user needs to split changes into multiple commits, stage only part of their work, create atomic commits from a large diff, cherry-pick specific changes to stage, or do anything that resembles `git add -p` but with structured control. Trigger phrases include: 'stage only the bug fix', 'commit these separately', 'only stage lines X-Y', 'split this into two commits', 'don't stage everything', 'only commit the tests', 'separate the formatting from the logic', 'partial staging', 'I want to pick which changes to commit', 'selective commit'." +description: "Selective git staging and checkout with gitsift — stage or discard specific hunks or individual lines instead of entire files. Use this skill whenever the user needs to split changes into multiple commits, stage only part of their work, create atomic commits from a large diff, cherry-pick specific changes to stage, discard specific hunks, undo certain changes selectively, revert part of their work, or do anything that resembles `git add -p` or `git checkout -p` but with structured control. Trigger phrases include: 'stage only the bug fix', 'commit these separately', 'only stage lines X-Y', 'split this into two commits', 'don't stage everything', 'only commit the tests', 'separate the formatting from the logic', 'partial staging', 'I want to pick which changes to commit', 'selective commit', 'discard this hunk', 'revert only that change', 'undo the formatting changes', 'unstage this hunk', 'throw away this change'." user_invocable: true --- -# Selective Staging with gitsift +# Selective Staging and Checkout with gitsift -gitsift is a CLI tool that replaces `git add -p` with structured output. It lets you stage individual hunks or even specific lines from unstaged changes — perfect for creating clean, atomic commits. +gitsift is a CLI tool that replaces `git add -p` and `git checkout -p` with structured output. It lets you stage or discard individual hunks or even specific lines — perfect for creating clean, atomic commits and selectively reverting changes. **Output formats**: gitsift supports two formats via `--format`: - `toon` — compact, token-efficient format (default). ~40% fewer tokens than JSON. @@ -18,7 +18,9 @@ Run `gitsift --version` to confirm it's installed. If not, see https://github.co ## Core Workflow -The workflow is always: **diff → decide → stage → verify → commit**. +### Staging workflow: **diff → decide → stage → verify → commit** + +### Checkout (discard) workflow: **diff → decide → checkout → verify** ### 1. See what changed @@ -26,13 +28,18 @@ The workflow is always: **diff → decide → stage → verify → commit**. gitsift diff ``` -This returns all unstaged changes in compact (toon) format. Each change is organized as files → hunks → lines, and every hunk has a unique ID you'll use for staging. +This returns all unstaged changes in compact (toon) format. Each change is organized as files → hunks → lines, and every hunk has a unique ID you'll use for staging or checkout. To focus on a specific file: ```bash gitsift diff --file src/main.rs ``` +To see staged changes (what's in the index vs HEAD): +```bash +gitsift diff --staged +``` + ### 2. Decide what to stage Look at the diff output and identify which hunks or lines belong together logically. Think about what makes a clean, atomic commit — group related changes together. @@ -107,7 +114,26 @@ echo '{"line_selections": [{"hunk_id": "59a9050fd4195c94", "line_indices": [1, 2 **Important**: you cannot mix `hunk_ids` and `line_selections` in one request. If you need both, make two separate calls. -### 5. Verify and commit +### 5. Discard unwanted hunks (checkout) + +To discard specific unstaged hunks (revert working tree → index): +```bash +gitsift checkout --hunk-ids abc123,def456 +``` + +To discard specific staged hunks (revert index → HEAD): +```bash +gitsift checkout --staged --hunk-ids abc123 +``` + +To discard via JSON stdin: +```bash +echo '{"hunk_ids": ["abc123"]}' | gitsift checkout --from-stdin +``` + +**Note**: Discarding an untracked file deletes it from the working tree. Discarding a staged new file removes it from the index (file stays on disk as untracked). + +### 6. Verify and commit Check what's staged vs unstaged: ```bash @@ -119,7 +145,7 @@ Then commit as usual: git commit -m "your message" ``` -If there are more changes to stage for a second commit, go back to step 1 — you need to re-diff because hunk IDs change after staging. +If there are more changes to stage for a second commit, go back to step 1 — you need to re-diff because hunk IDs change after staging or checkout. ## Protocol Mode (persistent sessions) @@ -131,7 +157,10 @@ gitsift protocol --repo . ```json {"method": "diff", "params": {"file": "src/main.rs"}} +{"method": "diff", "params": {"staged": true}} {"method": "stage", "params": {"hunk_ids": ["abc123"]}} +{"method": "checkout", "params": {"hunk_ids": ["abc123"]}} +{"method": "checkout", "params": {"hunk_ids": ["abc123"], "staged": true}} {"method": "status"} ``` @@ -151,7 +180,7 @@ gitsift stage --hunk-ids **Changes close together merge into one hunk.** If your modifications are within 3 lines of each other, git combines them into a single hunk. You can't split them with hunk-level staging — use line-level staging (`--from-stdin` with `line_selections`) instead. Check the hunk's `lines` array to pick exactly which delete/insert pairs to include. -**Re-diff after every stage.** Once you stage something, the remaining hunks shift and their IDs change. Always run `gitsift diff` again before the next `gitsift stage`. Using stale IDs will fail with "hunk ID not found". +**Re-diff after every stage or checkout.** Once you stage or checkout something, the remaining hunks shift and their IDs change. Always run `gitsift diff` again before the next operation. Using stale IDs will fail with "hunk ID not found". **One mode per request.** Either `hunk_ids` or `line_selections`, never both at once. The API rejects mixed requests — use separate calls if you need both. @@ -181,7 +210,7 @@ git commit -m "feat: add retry logic for API calls" ## Response Format -**Compact (toon) format** — default: +**Stage result — compact (toon) format** (default): ``` version: 1 ok: true @@ -189,11 +218,24 @@ staged: 2 failed: 0 ``` -**JSON format** — use `--format json`: +**Stage result — JSON format** (`--format json`): ```json {"version": 1, "ok": true, "data": {"staged": 2, "failed": 0}} ``` +**Checkout result — compact (toon) format** (default): +``` +version: 1 +ok: true +discarded: 1 +failed: 0 +``` + +**Checkout result — JSON format** (`--format json`): +```json +{"version": 1, "ok": true, "data": {"discarded": 1, "failed": 0}} +``` + On error (JSON): ```json {"version": 1, "ok": false, "error": "description of what went wrong"} diff --git a/src/cli.rs b/src/cli.rs index 3483788..728df11 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -19,11 +19,15 @@ pub struct Cli { #[derive(Subcommand)] pub enum Commands { - /// List all unstaged hunks + /// List unstaged hunks (or staged hunks with --staged) Diff { /// Filter by file path #[arg(short, long)] file: Option, + + /// Show staged changes (HEAD vs index) instead of unstaged + #[arg(long)] + staged: bool, }, /// Stage selected hunks or lines Stage { @@ -35,6 +39,20 @@ pub enum Commands { #[arg(long)] from_stdin: bool, }, + /// Discard selected hunks (unstaged by default, or staged with --staged) + Checkout { + /// Hunk IDs to discard (comma-separated) + #[arg(long, value_delimiter = ',')] + hunk_ids: Option>, + + /// Read JSON selection from stdin + #[arg(long)] + from_stdin: bool, + + /// Discard staged changes (index → HEAD) instead of unstaged (workdir → index) + #[arg(long)] + staged: bool, + }, /// Show staging status summary Status, /// Enter stdin/stdout JSON-lines protocol mode diff --git a/src/git/checkout.rs b/src/git/checkout.rs new file mode 100644 index 0000000..ba6e7ef --- /dev/null +++ b/src/git/checkout.rs @@ -0,0 +1,639 @@ +use anyhow::{Context, Result}; +use git2::{ApplyLocation, Diff, Repository}; +use std::cell::RefCell; +use std::collections::{HashMap, HashSet}; +use std::path::Path; + +use super::diff::hunk_id; +use super::{delta_path, hunk_header, is_binary_delta}; +use crate::models::{CheckoutRequest, CheckoutResult, HunkLine, LineTag}; + +/// Collected hunk data from a diff, used for reverse-patch reconstruction. +struct RawHunk { + file_path: String, + old_start: u32, + new_start: u32, + lines: Vec, +} + +/// Reconstruct a reverse unified diff patch for a single hunk. +/// +/// Swaps the direction: `-` lines become `+` lines (restore old content), +/// `+` lines become `-` lines (remove new content), context stays as-is. +/// The @@ header swaps old/new positions accordingly. +fn reconstruct_reverse_patch(hunk: &RawHunk) -> String { + use std::fmt::Write; + + let mut patch_lines: Vec = Vec::new(); + let mut old_count: u32 = 0; + let mut new_count: u32 = 0; + + for line in &hunk.lines { + match line.tag { + LineTag::Equal => { + patch_lines.push(format!(" {}", line.content)); + old_count += 1; + new_count += 1; + } + LineTag::Delete => { + // Original `-` becomes `+` in the reverse (restore deleted content) + patch_lines.push(format!("+{}", line.content)); + new_count += 1; + } + LineTag::Insert => { + // Original `+` becomes `-` in the reverse (remove inserted content) + patch_lines.push(format!("-{}", line.content)); + old_count += 1; + } + } + } + + // In the reverse patch: old side = new_start from original, new side = old_start from original + let header = + format!("@@ -{},{} +{},{} @@", hunk.new_start, old_count, hunk.old_start, new_count); + + let mut patch = String::new(); + let _ = writeln!(patch, "diff --git a/{0} b/{0}", hunk.file_path); + let _ = writeln!(patch, "--- a/{}", hunk.file_path); + let _ = writeln!(patch, "+++ b/{}", hunk.file_path); + patch.push_str(&header); + patch.push('\n'); + for line in &patch_lines { + patch.push_str(line); + if !line.ends_with('\n') { + patch.push('\n'); + } + } + + patch +} + +/// Metadata about a hunk: file path and whether the file is untracked/newly-added. +struct HunkMeta { + file_path: String, + is_untracked: bool, + is_added: bool, +} + +/// Scan the unstaged diff and return hunk metadata. +fn scan_unstaged_hunk_metadata(repo: &Repository) -> Result> { + let mut opts = super::diff_opts_with_untracked(); + + let diff = + repo.diff_index_to_workdir(None, Some(&mut opts)).context("failed to generate diff")?; + + let metadata: RefCell> = RefCell::new(HashMap::new()); + let current: RefCell<(String, bool, bool)> = RefCell::new((String::new(), false, false)); + + diff.foreach( + &mut |delta, _| { + if is_binary_delta(&delta) { + *current.borrow_mut() = (String::new(), false, false); + return true; + } + let path = delta_path(&delta); + let untracked = delta.status() == git2::Delta::Untracked; + let added = delta.status() == git2::Delta::Added; + *current.borrow_mut() = (path, untracked, added); + true + }, + None, + Some(&mut |_delta, hunk| { + let cur = current.borrow(); + let header = hunk_header(&hunk); + let id = hunk_id(&cur.0, hunk.old_start(), &header); + metadata.borrow_mut().insert( + id, + HunkMeta { file_path: cur.0.clone(), is_untracked: cur.1, is_added: cur.2 }, + ); + true + }), + None, + ) + .context("failed to scan unstaged hunk metadata")?; + + Ok(metadata.into_inner()) +} + +/// Scan the staged diff (HEAD vs index) and return hunk metadata. +fn scan_staged_hunk_metadata(repo: &Repository) -> Result> { + let head_tree = repo.head().ok().and_then(|h| h.peel_to_tree().ok()); + + let mut opts = super::diff_opts_tracked_only(); + + let diff = repo + .diff_tree_to_index(head_tree.as_ref(), None, Some(&mut opts)) + .context("failed to generate staged diff")?; + + let metadata: RefCell> = RefCell::new(HashMap::new()); + let current: RefCell<(String, bool)> = RefCell::new((String::new(), false)); + + diff.foreach( + &mut |delta, _| { + if is_binary_delta(&delta) { + *current.borrow_mut() = (String::new(), false); + return true; + } + let path = delta_path(&delta); + let is_added = delta.status() == git2::Delta::Added; + *current.borrow_mut() = (path, is_added); + true + }, + None, + Some(&mut |_delta, hunk| { + let cur = current.borrow(); + let header = hunk_header(&hunk); + let id = hunk_id(&cur.0, hunk.old_start(), &header); + metadata.borrow_mut().insert( + id, + HunkMeta { file_path: cur.0.clone(), is_untracked: false, is_added: cur.1 }, + ); + true + }), + None, + ) + .context("failed to scan staged hunk metadata")?; + + Ok(metadata.into_inner()) +} + +/// Collect full hunk data from a diff, keyed by hunk ID. +fn collect_hunks_from_diff(diff: &git2::Diff<'_>) -> Result> { + let state = RefCell::new(CollectState { + hunks: HashMap::new(), + current_path: String::new(), + current_hunk_id: None, + }); + + diff.foreach( + &mut |delta, _| { + let mut s = state.borrow_mut(); + if is_binary_delta(&delta) { + s.current_path.clear(); + s.current_hunk_id = None; + } else { + s.current_path = delta_path(&delta); + s.current_hunk_id = None; + } + true + }, + None, + Some(&mut |_delta, hunk| { + let mut s = state.borrow_mut(); + let header = hunk_header(&hunk); + let id = hunk_id(&s.current_path, hunk.old_start(), &header); + let raw = RawHunk { + file_path: s.current_path.clone(), + old_start: hunk.old_start(), + new_start: hunk.new_start(), + lines: Vec::new(), + }; + s.hunks.insert(id.clone(), raw); + s.current_hunk_id = Some(id); + true + }), + Some(&mut |_delta, _hunk, line| { + let mut s = state.borrow_mut(); + let tag = LineTag::from_origin(line.origin()); + let content = String::from_utf8_lossy(line.content()).into_owned(); + let hunk_line = HunkLine { + tag, + content, + old_lineno: line.old_lineno(), + new_lineno: line.new_lineno(), + }; + if let Some(id) = s.current_hunk_id.clone() + && let Some(raw) = s.hunks.get_mut(&id) + { + raw.lines.push(hunk_line); + } + true + }), + ) + .context("failed to iterate diff")?; + + Ok(state.into_inner().hunks) +} + +struct CollectState { + hunks: HashMap, + current_path: String, + current_hunk_id: Option, +} + +/// Discard selected unstaged hunks (revert working tree → index state). +pub fn checkout_unstaged(repo_path: &Path, request: &CheckoutRequest) -> Result { + if request.hunk_ids.is_empty() { + return Ok(CheckoutResult { + discarded: 0, + failed: 0, + errors: vec!["no hunk IDs provided".into()], + }); + } + + let repo = Repository::open(repo_path).context("failed to open git repository")?; + let hunk_metadata = scan_unstaged_hunk_metadata(&repo)?; + + let unique_requested: HashSet<&str> = request.hunk_ids.iter().map(String::as_str).collect(); + + let mut discarded = 0usize; + let mut failed = 0usize; + let mut errors = Vec::new(); + + // Separate untracked files from tracked changes + let mut untracked_paths: Vec = Vec::new(); + let mut untracked_hunk_count = 0usize; + let mut tracked_ids: HashSet = HashSet::new(); + + for req_id in &unique_requested { + match hunk_metadata.get(*req_id) { + None => { + errors.push(format!("hunk ID not found: {req_id}")); + failed += 1; + } + Some(meta) if meta.is_untracked => { + // Discarding an untracked file = deleting it + if !untracked_paths.contains(&meta.file_path) { + untracked_paths.push(meta.file_path.clone()); + } + untracked_hunk_count += 1; + } + Some(_) => { + tracked_ids.insert(req_id.to_string()); + } + } + } + + // Delete untracked files + for path in &untracked_paths { + let full_path = repo_path.join(path); + if full_path.exists() { + std::fs::remove_file(&full_path) + .with_context(|| format!("failed to delete untracked file: {path}"))?; + } + } + discarded += untracked_hunk_count; + + // For tracked hunks, collect full hunk data and apply reverse patches + if !tracked_ids.is_empty() { + let mut opts = super::diff_opts_with_untracked(); + let diff = + repo.diff_index_to_workdir(None, Some(&mut opts)).context("failed to generate diff")?; + + let hunks = collect_hunks_from_diff(&diff)?; + + for hunk_id in &tracked_ids { + match hunks.get(hunk_id) { + None => { + errors.push(format!("hunk ID not found in current diff: {hunk_id}")); + failed += 1; + } + Some(raw) => { + let patch_str = reconstruct_reverse_patch(raw); + match Diff::from_buffer(patch_str.as_bytes()) { + Ok(patch_diff) => { + repo.apply(&patch_diff, ApplyLocation::WorkDir, None).with_context( + || format!("failed to apply reverse patch for hunk {hunk_id}"), + )?; + discarded += 1; + } + Err(e) => { + errors.push(format!( + "failed to parse reverse patch for hunk {hunk_id}: {e}" + )); + failed += 1; + } + } + } + } + } + } + + Ok(CheckoutResult { discarded, failed, errors }) +} + +/// Discard selected staged hunks (revert index → HEAD state). +pub fn checkout_staged(repo_path: &Path, request: &CheckoutRequest) -> Result { + if request.hunk_ids.is_empty() { + return Ok(CheckoutResult { + discarded: 0, + failed: 0, + errors: vec!["no hunk IDs provided".into()], + }); + } + + let repo = Repository::open(repo_path).context("failed to open git repository")?; + let hunk_metadata = scan_staged_hunk_metadata(&repo)?; + + let unique_requested: HashSet<&str> = request.hunk_ids.iter().map(String::as_str).collect(); + + let mut discarded = 0usize; + let mut failed = 0usize; + let mut errors = Vec::new(); + + // Separate newly-added files from modified files + let mut added_paths: Vec = Vec::new(); + let mut added_hunk_count = 0usize; + let mut modified_ids: HashSet = HashSet::new(); + + for req_id in &unique_requested { + match hunk_metadata.get(*req_id) { + None => { + errors.push(format!("hunk ID not found: {req_id}")); + failed += 1; + } + Some(meta) if meta.is_added => { + // Discarding a staged new file = removing from index + if !added_paths.contains(&meta.file_path) { + added_paths.push(meta.file_path.clone()); + } + added_hunk_count += 1; + } + Some(_) => { + modified_ids.insert(req_id.to_string()); + } + } + } + + // Remove newly-added files from the index + if !added_paths.is_empty() { + let mut index = repo.index().context("failed to open index")?; + for path in &added_paths { + index + .remove_path(Path::new(path)) + .with_context(|| format!("failed to remove from index: {path}"))?; + } + index.write().context("failed to write index")?; + discarded += added_hunk_count; + } + + // For modified files, collect staged hunk data and apply reverse patches to the index + if !modified_ids.is_empty() { + let head_tree = repo.head().ok().and_then(|h| h.peel_to_tree().ok()); + + let mut opts = super::diff_opts_tracked_only(); + let diff = repo + .diff_tree_to_index(head_tree.as_ref(), None, Some(&mut opts)) + .context("failed to generate staged diff")?; + + let hunks = collect_hunks_from_diff(&diff)?; + + for hunk_id in &modified_ids { + match hunks.get(hunk_id) { + None => { + errors.push(format!("hunk ID not found in staged diff: {hunk_id}")); + failed += 1; + } + Some(raw) => { + let patch_str = reconstruct_reverse_patch(raw); + match Diff::from_buffer(patch_str.as_bytes()) { + Ok(patch_diff) => { + repo.apply(&patch_diff, ApplyLocation::Index, None).with_context( + || { + format!( + "failed to apply reverse patch to index for hunk {hunk_id}" + ) + }, + )?; + discarded += 1; + } + Err(e) => { + errors.push(format!( + "failed to parse reverse patch for hunk {hunk_id}: {e}" + )); + failed += 1; + } + } + } + } + } + } + + Ok(CheckoutResult { discarded, failed, errors }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::git::diff::{diff_staged, diff_unstaged}; + use git2::Signature; + use std::fs; + use tempfile::TempDir; + + /// Create a temp repo with an initial commit containing a multi-line file. + fn setup_repo() -> (TempDir, Repository) { + let dir = TempDir::new().unwrap(); + let repo = Repository::init(dir.path()).unwrap(); + let sig = Signature::now("test", "test@test.com").unwrap(); + + fs::write(dir.path().join("hello.txt"), "line 1\nline 2\nline 3\n").unwrap(); + + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("hello.txt")).unwrap(); + index.write().unwrap(); + let tree_oid = index.write_tree().unwrap(); + let tree = repo.find_tree(tree_oid).unwrap(); + repo.commit(Some("HEAD"), &sig, &sig, "initial", &tree, &[]).unwrap(); + } + + (dir, repo) + } + + /// Create a temp repo with a file that produces 2 hunks when modified. + fn setup_two_hunk_repo() -> (TempDir, Repository) { + let dir = TempDir::new().unwrap(); + let repo = Repository::init(dir.path()).unwrap(); + let sig = Signature::now("test", "test@test.com").unwrap(); + + let lines: Vec = (1..=20).map(|i| format!("line {i}")).collect(); + fs::write(dir.path().join("file.txt"), lines.join("\n") + "\n").unwrap(); + + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("file.txt")).unwrap(); + index.write().unwrap(); + let tree_oid = index.write_tree().unwrap(); + let tree = repo.find_tree(tree_oid).unwrap(); + repo.commit(Some("HEAD"), &sig, &sig, "initial", &tree, &[]).unwrap(); + } + + let mut modified = lines; + modified[1] = "line 2 CHANGED".to_string(); + modified[18] = "line 19 CHANGED".to_string(); + fs::write(dir.path().join("file.txt"), modified.join("\n") + "\n").unwrap(); + + (dir, repo) + } + + // ===== Checkout unstaged tests ===== + + #[test] + fn checkout_single_unstaged_hunk() { + let (dir, _repo) = setup_two_hunk_repo(); + + let output = diff_unstaged(dir.path(), None).unwrap(); + assert!(output.total_hunks >= 2); + let first_id = output.files[0].hunks[0].id.clone(); + + let request = CheckoutRequest { hunk_ids: vec![first_id] }; + let result = checkout_unstaged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 1); + assert_eq!(result.failed, 0); + + // Should still have remaining hunks + let remaining = diff_unstaged(dir.path(), None).unwrap(); + assert!(remaining.total_hunks >= 1); + } + + #[test] + fn checkout_all_unstaged_hunks_restores_file() { + let (dir, _repo) = setup_two_hunk_repo(); + + let output = diff_unstaged(dir.path(), None).unwrap(); + let all_ids: Vec = output.files[0].hunks.iter().map(|h| h.id.clone()).collect(); + + let request = CheckoutRequest { hunk_ids: all_ids }; + let result = checkout_unstaged(dir.path(), &request).unwrap(); + assert!(result.discarded >= 2); + assert_eq!(result.failed, 0); + + // No more unstaged changes + let remaining = diff_unstaged(dir.path(), Some("file.txt")).unwrap(); + assert_eq!(remaining.total_hunks, 0); + } + + #[test] + fn checkout_unstaged_invalid_id() { + let (dir, _repo) = setup_two_hunk_repo(); + + let request = CheckoutRequest { hunk_ids: vec!["nonexistent".into()] }; + let result = checkout_unstaged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 0); + assert_eq!(result.failed, 1); + assert!(result.errors[0].contains("not found")); + } + + #[test] + fn checkout_unstaged_empty_request() { + let (dir, _repo) = setup_two_hunk_repo(); + + let request = CheckoutRequest { hunk_ids: vec![] }; + let result = checkout_unstaged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 0); + assert!(result.errors[0].contains("no hunk IDs")); + } + + #[test] + fn checkout_unstaged_untracked_file_deletes_it() { + let (dir, _repo) = setup_repo(); + + fs::write(dir.path().join("untracked.txt"), "new stuff\n").unwrap(); + + let output = diff_unstaged(dir.path(), None).unwrap(); + let untracked = output.files.iter().find(|f| f.path == "untracked.txt").unwrap(); + let hunk_id = untracked.hunks[0].id.clone(); + + let request = CheckoutRequest { hunk_ids: vec![hunk_id] }; + let result = checkout_unstaged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 1); + assert_eq!(result.failed, 0); + + // File should be deleted + assert!(!dir.path().join("untracked.txt").exists()); + } + + #[test] + fn checkout_unstaged_modified_restores_content() { + let (dir, _repo) = setup_repo(); + + let original = fs::read_to_string(dir.path().join("hello.txt")).unwrap(); + fs::write(dir.path().join("hello.txt"), "completely changed\n").unwrap(); + + let output = diff_unstaged(dir.path(), None).unwrap(); + let hunk_id = output.files[0].hunks[0].id.clone(); + + let request = CheckoutRequest { hunk_ids: vec![hunk_id] }; + let result = checkout_unstaged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 1); + + let restored = fs::read_to_string(dir.path().join("hello.txt")).unwrap(); + assert_eq!(restored, original); + } + + // ===== Checkout staged tests ===== + + #[test] + fn checkout_staged_single_hunk() { + let (dir, repo) = setup_repo(); + + // Modify and stage + fs::write(dir.path().join("hello.txt"), "line 1\nline 2 STAGED\nline 3\n").unwrap(); + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("hello.txt")).unwrap(); + index.write().unwrap(); + } + + let output = diff_staged(dir.path(), None).unwrap(); + assert!(output.total_hunks >= 1); + let hunk_id = output.files[0].hunks[0].id.clone(); + + let request = CheckoutRequest { hunk_ids: vec![hunk_id] }; + let result = checkout_staged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 1); + assert_eq!(result.failed, 0); + + // No more staged changes + let remaining = diff_staged(dir.path(), None).unwrap(); + assert_eq!(remaining.total_hunks, 0); + } + + #[test] + fn checkout_staged_new_file_removes_from_index() { + let (dir, repo) = setup_repo(); + + fs::write(dir.path().join("new_file.txt"), "brand new\n").unwrap(); + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("new_file.txt")).unwrap(); + index.write().unwrap(); + } + + let output = diff_staged(dir.path(), None).unwrap(); + let new_file_hunk = + output.files.iter().find(|f| f.path == "new_file.txt").unwrap().hunks[0].id.clone(); + + let request = CheckoutRequest { hunk_ids: vec![new_file_hunk] }; + let result = checkout_staged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 1); + + // File should no longer be in the staged diff + let remaining = diff_staged(dir.path(), None).unwrap(); + assert!(remaining.files.iter().all(|f| f.path != "new_file.txt")); + + // File should still exist on disk (just unstaged now) + assert!(dir.path().join("new_file.txt").exists()); + } + + #[test] + fn checkout_staged_invalid_id() { + let (dir, _repo) = setup_repo(); + + let request = CheckoutRequest { hunk_ids: vec!["nonexistent".into()] }; + let result = checkout_staged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 0); + assert_eq!(result.failed, 1); + assert!(result.errors[0].contains("not found")); + } + + #[test] + fn checkout_staged_empty_request() { + let (dir, _repo) = setup_repo(); + + let request = CheckoutRequest { hunk_ids: vec![] }; + let result = checkout_staged(dir.path(), &request).unwrap(); + assert_eq!(result.discarded, 0); + assert!(result.errors[0].contains("no hunk IDs")); + } +} diff --git a/src/git/diff.rs b/src/git/diff.rs index bed5cf2..9860c81 100644 --- a/src/git/diff.rs +++ b/src/git/diff.rs @@ -151,6 +151,94 @@ pub fn diff_unstaged(repo_path: &Path, file_filter: Option<&str>) -> Result) -> Result { + let repo = Repository::open(repo_path).context("failed to open git repository")?; + + let mut opts = super::diff_opts_tracked_only(); + if let Some(filter) = file_filter { + opts.pathspec(filter); + } + + // Get HEAD tree; if no commits yet, use an empty tree + let head_tree = repo.head().ok().and_then(|h| h.peel_to_tree().ok()); + + let diff = repo + .diff_tree_to_index(head_tree.as_ref(), None, Some(&mut opts)) + .context("failed to generate staged diff")?; + + let state = RefCell::new(DiffState::new()); + + diff.foreach( + &mut |delta, _progress| { + let mut s = state.borrow_mut(); + s.flush_file(); + + if is_binary_delta(&delta) { + return true; + } + + let path = delta_path(&delta); + + s.current_file = Some(FileChange { + path, + status: delta_to_status(delta.status()), + hunks: Vec::new(), + }); + true + }, + None, + Some(&mut |_delta, hunk| { + let mut s = state.borrow_mut(); + s.flush_hunk(); + + let file_path = s.current_file.as_ref().map_or("", |f| f.path.as_str()); + + let header = hunk_header(&hunk); + + s.current_hunk = Some(Hunk { + id: hunk_id(file_path, hunk.old_start(), &header), + file_path: file_path.to_string(), + old_start: hunk.old_start(), + old_lines: hunk.old_lines(), + new_start: hunk.new_start(), + new_lines: hunk.new_lines(), + header, + lines: Vec::new(), + }); + true + }), + Some(&mut |_delta, _hunk, line| { + let mut s = state.borrow_mut(); + + let tag = LineTag::from_origin(line.origin()); + + let content = String::from_utf8_lossy(line.content()).into_owned(); + + let hunk_line = HunkLine { + tag, + content, + old_lineno: line.old_lineno(), + new_lineno: line.new_lineno(), + }; + + if let Some(hunk) = s.current_hunk.as_mut() { + hunk.lines.push(hunk_line); + } + true + }), + ) + .context("failed to iterate staged diff")?; + + let files = state.into_inner().finalize(); + let total_hunks = files.iter().map(|f| f.hunks.len()).sum(); + + Ok(DiffOutput { files, total_hunks }) +} + #[cfg(test)] mod tests { use super::*; @@ -342,4 +430,72 @@ mod tests { assert!(inserted.old_lineno.is_none(), "insert has no old line number"); assert!(inserted.new_lineno.is_some(), "insert has new line number"); } + + // --- diff_staged tests --- + + #[test] + fn staged_diff_shows_staged_changes() { + let (dir, repo) = setup_repo(); + + // Modify and stage + fs::write(dir.path().join("hello.txt"), "line 1\nline 2 STAGED\nline 3\n").unwrap(); + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("hello.txt")).unwrap(); + index.write().unwrap(); + } + + let output = diff_staged(dir.path(), None).unwrap(); + assert_eq!(output.files.len(), 1); + assert_eq!(output.files[0].path, "hello.txt"); + assert_eq!(output.files[0].status, FileStatus::Modified); + assert!(output.total_hunks >= 1); + } + + #[test] + fn staged_diff_empty_when_nothing_staged() { + let (dir, _repo) = setup_repo(); + + // Modify but don't stage + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + let output = diff_staged(dir.path(), None).unwrap(); + assert_eq!(output.files.len(), 0); + assert_eq!(output.total_hunks, 0); + } + + #[test] + fn staged_diff_with_file_filter() { + let (dir, repo) = setup_repo(); + + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + fs::write(dir.path().join("other.txt"), "new\n").unwrap(); + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("hello.txt")).unwrap(); + index.add_path(Path::new("other.txt")).unwrap(); + index.write().unwrap(); + } + + let output = diff_staged(dir.path(), Some("hello.txt")).unwrap(); + assert_eq!(output.files.len(), 1); + assert_eq!(output.files[0].path, "hello.txt"); + } + + #[test] + fn staged_diff_new_file() { + let (dir, repo) = setup_repo(); + + fs::write(dir.path().join("brand_new.txt"), "new content\n").unwrap(); + { + let mut index = repo.index().unwrap(); + index.add_path(Path::new("brand_new.txt")).unwrap(); + index.write().unwrap(); + } + + let output = diff_staged(dir.path(), None).unwrap(); + let new_file = output.files.iter().find(|f| f.path == "brand_new.txt"); + assert!(new_file.is_some()); + assert_eq!(new_file.unwrap().status, FileStatus::Added); + } } diff --git a/src/git/mod.rs b/src/git/mod.rs index dcab348..b48d054 100644 --- a/src/git/mod.rs +++ b/src/git/mod.rs @@ -1,5 +1,6 @@ use git2::DiffOptions; +pub mod checkout; pub mod diff; pub mod stage; pub mod status; diff --git a/src/main.rs b/src/main.rs index e472efd..585d6fd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,8 +13,13 @@ fn main() -> Result<()> { let cli = Cli::parse(); match &cli.command { - Commands::Diff { file } => { - let diff = git::diff::diff_unstaged(&cli.repo, file.as_ref().and_then(|f| f.to_str()))?; + Commands::Diff { file, staged } => { + let file_filter = file.as_ref().and_then(|f| f.to_str()); + let diff = if *staged { + git::diff::diff_staged(&cli.repo, file_filter)? + } else { + git::diff::diff_unstaged(&cli.repo, file_filter)? + }; output::print_diff(&diff, &cli.format); } Commands::Stage { hunk_ids, from_stdin } => { @@ -29,6 +34,19 @@ fn main() -> Result<()> { let result = git::stage::stage_selection(&cli.repo, &request)?; output::print_stage_result(&result, &cli.format); } + Commands::Checkout { hunk_ids, from_stdin, staged } => { + let request = if *from_stdin { + serde_json::from_reader(std::io::stdin())? + } else { + models::CheckoutRequest { hunk_ids: hunk_ids.clone().unwrap_or_default() } + }; + let result = if *staged { + git::checkout::checkout_staged(&cli.repo, &request)? + } else { + git::checkout::checkout_unstaged(&cli.repo, &request)? + }; + output::print_checkout_result(&result, &cli.format); + } Commands::Status => { let status = git::status::get_status(&cli.repo)?; output::print_status(&status, &cli.format); diff --git a/src/models.rs b/src/models.rs index 909a05e..a12fabd 100644 --- a/src/models.rs +++ b/src/models.rs @@ -106,6 +106,23 @@ pub struct StageResult { pub errors: Vec, } +/// Input for `gitsift checkout`. +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub struct CheckoutRequest { + /// Discard entire hunks by ID. + #[serde(default)] + pub hunk_ids: Vec, +} + +/// Result of a checkout (discard) operation. +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub struct CheckoutResult { + pub discarded: usize, + pub failed: usize, + #[serde(skip_serializing_if = "Vec::is_empty", default)] + pub errors: Vec, +} + /// Protocol response version. Increment on breaking schema changes. pub const PROTOCOL_VERSION: u8 = 1; @@ -143,12 +160,28 @@ pub enum ProtocolRequest { Stage { params: StageRequest, }, + Checkout { + params: CheckoutParams, + }, Status, } #[derive(Debug, Default, PartialEq, Serialize, Deserialize)] pub struct DiffParams { pub file: Option, + /// If true, show staged changes (HEAD vs index) instead of unstaged. + #[serde(default)] + pub staged: bool, +} + +/// Parameters for the checkout protocol method. +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub struct CheckoutParams { + #[serde(default)] + pub hunk_ids: Vec, + /// If true, discard staged changes (index → HEAD). Default: discard unstaged (workdir → index). + #[serde(default)] + pub staged: bool, } /// Staging status summary. @@ -431,13 +464,44 @@ mod tests { assert!(result.is_err()); } + #[test] + fn protocol_request_checkout_parse() { + let json = r#"{"method": "checkout", "params": {"hunk_ids": ["a1"], "staged": true}}"#; + let req: ProtocolRequest = serde_json::from_str(json).unwrap(); + match req { + ProtocolRequest::Checkout { params } => { + assert_eq!(params.hunk_ids, vec!["a1"]); + assert!(params.staged); + } + _ => panic!("expected Checkout variant"), + } + } + + #[test] + fn protocol_request_diff_staged_parse() { + let json = r#"{"method": "diff", "params": {"staged": true}}"#; + let req: ProtocolRequest = serde_json::from_str(json).unwrap(); + match req { + ProtocolRequest::Diff { params } => { + assert!(params.staged); + assert_eq!(params.file, None); + } + _ => panic!("expected Diff variant"), + } + } + #[test] fn protocol_request_roundtrip() { let requests = vec![ - ProtocolRequest::Diff { params: DiffParams { file: Some("test.rs".into()) } }, + ProtocolRequest::Diff { + params: DiffParams { file: Some("test.rs".into()), staged: false }, + }, ProtocolRequest::Stage { params: StageRequest { hunk_ids: vec!["id1".into()], line_selections: vec![] }, }, + ProtocolRequest::Checkout { + params: CheckoutParams { hunk_ids: vec!["id2".into()], staged: false }, + }, ProtocolRequest::Status, ]; for req in &requests { @@ -446,4 +510,29 @@ mod tests { assert_eq!(*req, back); } } + + // --- CheckoutRequest round-trip --- + + #[test] + fn checkout_request_roundtrip() { + let req = CheckoutRequest { hunk_ids: vec!["abc".into(), "def".into()] }; + let json = serde_json::to_string(&req).unwrap(); + let back: CheckoutRequest = serde_json::from_str(&json).unwrap(); + assert_eq!(req, back); + } + + #[test] + fn checkout_result_roundtrip() { + let result = CheckoutResult { discarded: 2, failed: 1, errors: vec!["not found".into()] }; + let json = serde_json::to_string(&result).unwrap(); + let back: CheckoutResult = serde_json::from_str(&json).unwrap(); + assert_eq!(result, back); + } + + #[test] + fn checkout_result_no_errors_omits_field() { + let result = CheckoutResult { discarded: 1, failed: 0, errors: vec![] }; + let json = serde_json::to_string(&result).unwrap(); + assert!(!json.contains("errors")); + } } diff --git a/src/output.rs b/src/output.rs index 39c5cbe..c68fb92 100644 --- a/src/output.rs +++ b/src/output.rs @@ -1,5 +1,5 @@ use crate::cli::OutputFormat; -use crate::models::{DiffOutput, Response, StageResult, StatusSummary}; +use crate::models::{CheckoutResult, DiffOutput, Response, StageResult, StatusSummary}; /// Print a value as a JSON `Response::success` envelope. fn print_json(data: &impl serde::Serialize) { @@ -23,6 +23,14 @@ pub fn print_stage_result(result: &StageResult, format: &OutputFormat) { } } +/// Print checkout result. +pub fn print_checkout_result(result: &CheckoutResult, format: &OutputFormat) { + match format { + OutputFormat::Json => print_json(result), + OutputFormat::Toon => print!("{}", crate::toon::format_checkout_result(result)), + } +} + /// Print status summary. pub fn print_status(status: &StatusSummary, format: &OutputFormat) { match format { diff --git a/src/protocol.rs b/src/protocol.rs index 8bdb703..a079d7c 100644 --- a/src/protocol.rs +++ b/src/protocol.rs @@ -100,11 +100,23 @@ pub fn run_protocol(repo_path: &Path) -> Result<()> { let write_ok = match request { ProtocolRequest::Diff { params } => { - dispatch(&mut out, git::diff::diff_unstaged(repo_path, params.file.as_deref())) + if params.staged { + dispatch(&mut out, git::diff::diff_staged(repo_path, params.file.as_deref())) + } else { + dispatch(&mut out, git::diff::diff_unstaged(repo_path, params.file.as_deref())) + } } ProtocolRequest::Stage { params } => { dispatch(&mut out, git::stage::stage_selection(repo_path, ¶ms)) } + ProtocolRequest::Checkout { params } => { + let request = crate::models::CheckoutRequest { hunk_ids: params.hunk_ids.clone() }; + if params.staged { + dispatch(&mut out, git::checkout::checkout_staged(repo_path, &request)) + } else { + dispatch(&mut out, git::checkout::checkout_unstaged(repo_path, &request)) + } + } ProtocolRequest::Status => dispatch(&mut out, git::status::get_status(repo_path)), }; diff --git a/src/toon.rs b/src/toon.rs index 1d9e221..2f54ce1 100644 --- a/src/toon.rs +++ b/src/toon.rs @@ -71,6 +71,23 @@ pub fn format_diff(output: &DiffOutput) -> String { buf } +/// Format a checkout result response in compact format. +pub fn format_checkout_result(result: &crate::models::CheckoutResult) -> String { + let resp = Response::success(result); + let mut buf = String::new(); + writeln!(buf, "version: {}", resp.version).unwrap(); + writeln!(buf, "ok: true").unwrap(); + writeln!(buf, "discarded: {}", result.discarded).unwrap(); + writeln!(buf, "failed: {}", result.failed).unwrap(); + if !result.errors.is_empty() { + writeln!(buf, "errors[{}]:", result.errors.len()).unwrap(); + for err in &result.errors { + writeln!(buf, " - {err}").unwrap(); + } + } + buf +} + /// Format a stage result response in compact format. pub fn format_stage_result(result: &StageResult) -> String { let resp = Response::success(result); diff --git a/tests/cli.rs b/tests/cli.rs index 79ead6a..58d7cd5 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -142,6 +142,7 @@ fn help_shows_subcommands() { let stdout = String::from_utf8(output.stdout).unwrap(); assert!(stdout.contains("diff")); assert!(stdout.contains("stage")); + assert!(stdout.contains("checkout")); assert!(stdout.contains("status")); assert!(stdout.contains("protocol")); } @@ -286,3 +287,206 @@ fn json_format_preserves_context_lines() { let has_equal = lines.iter().any(|l| l["tag"] == "equal"); assert!(has_equal, "JSON format should preserve context lines"); } + +// ===== checkout subcommand ===== + +#[test] +fn checkout_unstaged_hunk_by_id() { + let dir = setup_repo(); + let original = fs::read_to_string(dir.path().join("hello.txt")).unwrap(); + fs::write(dir.path().join("hello.txt"), "completely changed\n").unwrap(); + + // Get hunk ID + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let hunk_id = diff_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + // Checkout (discard) + let checkout_out = gitsift() + .args(["checkout", "--hunk-ids", hunk_id, "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + assert!(checkout_out.status.success()); + let val = parse_json(&checkout_out.stdout); + assert_eq!(val["ok"], true); + assert_eq!(val["data"]["discarded"], 1); + assert_eq!(val["data"]["failed"], 0); + + // File should be restored + let restored = fs::read_to_string(dir.path().join("hello.txt")).unwrap(); + assert_eq!(restored, original); +} + +#[test] +fn checkout_unstaged_invalid_id() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + let output = gitsift() + .args(["checkout", "--hunk-ids", "bogus", "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + assert!(output.status.success()); + let val = parse_json(&output.stdout); + assert_eq!(val["data"]["discarded"], 0); + assert_eq!(val["data"]["failed"], 1); +} + +#[test] +fn checkout_unstaged_toon_output() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let hunk_id = diff_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + let output = gitsift() + .args(["checkout", "--hunk-ids", hunk_id, "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + assert!(output.status.success()); + let out = stdout_str(&output.stdout); + assert!(out.contains("ok: true")); + assert!(out.contains("discarded: 1")); + assert!(out.contains("failed: 0")); +} + +#[test] +fn checkout_staged_hunk() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + // Stage the change first + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let hunk_id = diff_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + gitsift() + .args(["stage", "--hunk-ids", hunk_id, "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + // Get staged hunk ID + let staged_diff = gitsift() + .args(["diff", "--staged", "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + let staged_val = parse_json(&staged_diff.stdout); + assert!(staged_val["data"]["total_hunks"].as_u64().unwrap() > 0); + let staged_hunk_id = staged_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + // Checkout staged + let checkout_out = gitsift() + .args(["checkout", "--staged", "--hunk-ids", staged_hunk_id, "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + assert!(checkout_out.status.success()); + let val = parse_json(&checkout_out.stdout); + assert_eq!(val["data"]["discarded"], 1); + + // No more staged changes + let status_out = + gitsift().args(["status", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let status_val = parse_json(&status_out.stdout); + assert_eq!(status_val["data"]["staged_hunks"], 0); +} + +// ===== diff --staged subcommand ===== + +#[test] +fn diff_staged_json_output() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + // Stage it + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let hunk_id = diff_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + gitsift() + .args(["stage", "--hunk-ids", hunk_id, "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + // diff --staged should show the staged change + let staged_out = gitsift() + .args(["diff", "--staged", "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + assert!(staged_out.status.success()); + let val = parse_json(&staged_out.stdout); + assert_eq!(val["ok"], true); + assert!(val["data"]["total_hunks"].as_u64().unwrap() > 0); +} + +#[test] +fn diff_staged_empty_when_nothing_staged() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + let output = gitsift() + .args(["diff", "--staged", "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + + assert!(output.status.success()); + let val = parse_json(&output.stdout); + assert_eq!(val["data"]["total_hunks"], 0); +} + +// ===== full checkout workflow E2E ===== + +#[test] +fn full_workflow_diff_checkout_verify() { + let dir = setup_repo(); + let original = fs::read_to_string(dir.path().join("hello.txt")).unwrap(); + fs::write(dir.path().join("hello.txt"), "line 1\nchanged\nline 3\n").unwrap(); + + // 1. Diff + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let hunk_id = diff_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + // 2. Checkout + let checkout_out = gitsift() + .args(["checkout", "--hunk-ids", hunk_id, "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + assert!(checkout_out.status.success()); + let val = parse_json(&checkout_out.stdout); + assert_eq!(val["data"]["discarded"], 1); + + // 3. Diff again — should be clean + let diff2_out = gitsift() + .args(["diff", "--format", "json", "--file", "hello.txt", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + let diff2_val = parse_json(&diff2_out.stdout); + assert_eq!(diff2_val["data"]["total_hunks"], 0); + + // 4. File matches original + let restored = fs::read_to_string(dir.path().join("hello.txt")).unwrap(); + assert_eq!(restored, original); +} diff --git a/tests/edge_cases.rs b/tests/edge_cases.rs index 779fbf3..4c20506 100644 --- a/tests/edge_cases.rs +++ b/tests/edge_cases.rs @@ -299,3 +299,90 @@ fn clean_repo_diff_returns_empty() { assert_eq!(val["data"]["total_hunks"], 0); assert_eq!(val["data"]["files"].as_array().unwrap().len(), 0); } + +// ===== Checkout edge cases ===== + +#[test] +fn checkout_untracked_file_deletes_it() { + let dir = setup_repo(); + fs::write(dir.path().join("untracked.txt"), "new stuff\n").unwrap(); + + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let untracked = diff_val["data"]["files"] + .as_array() + .unwrap() + .iter() + .find(|f| f["path"] == "untracked.txt") + .unwrap(); + let hunk_id = untracked["hunks"][0]["id"].as_str().unwrap(); + + let checkout_out = gitsift() + .args(["checkout", "--hunk-ids", hunk_id, "--format", "json", "--repo"]) + .arg(dir.path()) + .output() + .unwrap(); + assert!(checkout_out.status.success()); + let val = parse_json(&checkout_out.stdout); + assert_eq!(val["data"]["discarded"], 1); + + assert!(!dir.path().join("untracked.txt").exists()); +} + +#[test] +fn checkout_from_stdin_json() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + let diff_out = + gitsift().args(["diff", "--format", "json", "--repo"]).arg(dir.path()).output().unwrap(); + let diff_val = parse_json(&diff_out.stdout); + let hunk_id = diff_val["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + let stdin_json = format!(r#"{{"hunk_ids": ["{hunk_id}"]}}"#); + let checkout_out = gitsift() + .args(["checkout", "--from-stdin", "--format", "json", "--repo"]) + .arg(dir.path()) + .write_stdin(stdin_json) + .output() + .unwrap(); + + assert!(checkout_out.status.success()); + let val = parse_json(&checkout_out.stdout); + assert_eq!(val["data"]["discarded"], 1); +} + +#[test] +fn protocol_checkout_method() { + let dir = setup_repo(); + fs::write(dir.path().join("hello.txt"), "changed\n").unwrap(); + + // Get hunk ID via protocol diff + let diff_out = gitsift() + .args(["protocol", "--repo"]) + .arg(dir.path()) + .write_stdin("{\"method\": \"diff\"}\n") + .output() + .unwrap(); + let diff_resp: serde_json::Value = + serde_json::from_str(String::from_utf8(diff_out.stdout).unwrap().lines().next().unwrap()) + .unwrap(); + let hunk_id = diff_resp["data"]["files"][0]["hunks"][0]["id"].as_str().unwrap(); + + // Checkout via protocol + let checkout_stdin = + format!("{{\"method\": \"checkout\", \"params\": {{\"hunk_ids\": [\"{hunk_id}\"]}}}}\n"); + let checkout_out = gitsift() + .args(["protocol", "--repo"]) + .arg(dir.path()) + .write_stdin(checkout_stdin) + .output() + .unwrap(); + let checkout_resp: serde_json::Value = serde_json::from_str( + String::from_utf8(checkout_out.stdout).unwrap().lines().next().unwrap(), + ) + .unwrap(); + assert_eq!(checkout_resp["ok"], true); + assert_eq!(checkout_resp["data"]["discarded"], 1); +} From b26089c8ccff0b38b06ee513e8f71daff3063617 Mon Sep 17 00:00:00 2001 From: MonteYin Date: Mon, 30 Mar 2026 17:10:43 +0800 Subject: [PATCH 2/2] fix: atomic batch apply and path traversal guard in checkout - Concatenate all reverse patches into a single string and apply atomically via one repo.apply() call, preventing failures when hunks are close together (both unstaged and staged paths). - Add canonicalize + starts_with guard before deleting untracked files to prevent path traversal outside the repository boundary. --- src/git/checkout.rs | 81 +++++++++++++++++++++++---------------------- 1 file changed, 41 insertions(+), 40 deletions(-) diff --git a/src/git/checkout.rs b/src/git/checkout.rs index ba6e7ef..cbd8526 100644 --- a/src/git/checkout.rs +++ b/src/git/checkout.rs @@ -264,17 +264,24 @@ pub fn checkout_unstaged(repo_path: &Path, request: &CheckoutRequest) -> Result< } } - // Delete untracked files + // Delete untracked files (with path traversal guard) + let repo_root = repo_path.canonicalize().context("failed to canonicalize repo path")?; for path in &untracked_paths { let full_path = repo_path.join(path); - if full_path.exists() { - std::fs::remove_file(&full_path) - .with_context(|| format!("failed to delete untracked file: {path}"))?; + let canonical = + full_path.canonicalize().with_context(|| format!("failed to resolve path: {path}"))?; + if !canonical.starts_with(&repo_root) { + errors.push(format!("path escapes repository boundary: {path}")); + failed += 1; + untracked_hunk_count -= 1; + continue; } + std::fs::remove_file(&canonical) + .with_context(|| format!("failed to delete untracked file: {path}"))?; } discarded += untracked_hunk_count; - // For tracked hunks, collect full hunk data and apply reverse patches + // For tracked hunks, collect full hunk data and apply all reverse patches atomically if !tracked_ids.is_empty() { let mut opts = super::diff_opts_with_untracked(); let diff = @@ -282,6 +289,10 @@ pub fn checkout_unstaged(repo_path: &Path, request: &CheckoutRequest) -> Result< let hunks = collect_hunks_from_diff(&diff)?; + // Build a single combined patch from all valid hunks + let mut combined_patch = String::new(); + let mut valid_count = 0usize; + for hunk_id in &tracked_ids { match hunks.get(hunk_id) { None => { @@ -289,24 +300,19 @@ pub fn checkout_unstaged(repo_path: &Path, request: &CheckoutRequest) -> Result< failed += 1; } Some(raw) => { - let patch_str = reconstruct_reverse_patch(raw); - match Diff::from_buffer(patch_str.as_bytes()) { - Ok(patch_diff) => { - repo.apply(&patch_diff, ApplyLocation::WorkDir, None).with_context( - || format!("failed to apply reverse patch for hunk {hunk_id}"), - )?; - discarded += 1; - } - Err(e) => { - errors.push(format!( - "failed to parse reverse patch for hunk {hunk_id}: {e}" - )); - failed += 1; - } - } + combined_patch.push_str(&reconstruct_reverse_patch(raw)); + valid_count += 1; } } } + + if valid_count > 0 { + let patch_diff = Diff::from_buffer(combined_patch.as_bytes()) + .context("failed to parse combined reverse patch")?; + repo.apply(&patch_diff, ApplyLocation::WorkDir, None) + .context("failed to apply reverse patches to working tree")?; + discarded += valid_count; + } } Ok(CheckoutResult { discarded, failed, errors }) @@ -367,7 +373,7 @@ pub fn checkout_staged(repo_path: &Path, request: &CheckoutRequest) -> Result Result { @@ -385,28 +395,19 @@ pub fn checkout_staged(repo_path: &Path, request: &CheckoutRequest) -> Result { - let patch_str = reconstruct_reverse_patch(raw); - match Diff::from_buffer(patch_str.as_bytes()) { - Ok(patch_diff) => { - repo.apply(&patch_diff, ApplyLocation::Index, None).with_context( - || { - format!( - "failed to apply reverse patch to index for hunk {hunk_id}" - ) - }, - )?; - discarded += 1; - } - Err(e) => { - errors.push(format!( - "failed to parse reverse patch for hunk {hunk_id}: {e}" - )); - failed += 1; - } - } + combined_patch.push_str(&reconstruct_reverse_patch(raw)); + valid_count += 1; } } } + + if valid_count > 0 { + let patch_diff = Diff::from_buffer(combined_patch.as_bytes()) + .context("failed to parse combined reverse patch")?; + repo.apply(&patch_diff, ApplyLocation::Index, None) + .context("failed to apply reverse patches to index")?; + discarded += valid_count; + } } Ok(CheckoutResult { discarded, failed, errors })