diff --git a/CHANGELOG.md b/CHANGELOG.md index 647acb6..7e151d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ This project follows semantic versioning. ## Unreleased +## v0.7.0 - 2026-05-25 + ### Added - Setup executor preflight phase and `environment.yaml` `setup.commands[]`. Galley now prepares fresh task worktrees before acceptance skeleton creation and implementation by running authored setup commands or dispatching a setup executor to learn a reusable setup plan, which is persisted back to `environment.yaml` on success. @@ -14,6 +16,11 @@ This project follows semantic versioning. ### Changed - Packaged Claude and Codex Galley plugins are now versioned as `0.1.13`: setup executor prompts now include explicit result JSON contracts and troubleshooting guidance routes `setup_failed` diagnosis through setup run evidence. +- The daemon now stages the executor-produced reviewable path set before capturing supervisor diff evidence, so newly created untracked files appear in `diff.patch` and supervisor review while `commit:false` input files and unrelated context-only worktree dirtiness stay out of the submitted diff. Review staging writes `git_add_review.stdout.log`, `git_add_review.stderr.log`, and `git_add_review_result.json` attempt evidence when it runs, and records a skipped result when there is no reviewable path set. + +### Fixed + +- Review-time staging failures are now recorded as attempt errors with `phase=review_staging` / `kind=review_staging_failed`; Galley does not invoke the supervisor with an empty or stale diff and moves the task to `tasks/failed` with staging-specific evidence for operator diagnosis. ## v0.6.2 - 2026-05-25 diff --git a/internal/daemon/loop.go b/internal/daemon/loop.go index a5d57e7..42e740a 100644 --- a/internal/daemon/loop.go +++ b/internal/daemon/loop.go @@ -18,9 +18,74 @@ import ( "github.com/shinpr/galley/internal/supervisor" "github.com/shinpr/galley/internal/task" "github.com/shinpr/galley/internal/taskstate" + "github.com/shinpr/galley/internal/vcs" "github.com/shinpr/galley/internal/workspace" ) +// reviewStagingError signals that Galley's review-time `git add -A` step +// failed after the executor exited and before the supervisor evaluation +// would have been driven against an empty or stale diff. The loop +// classification path (runOneSupervisorAttempt) inspects this type to record +// the failure with a distinct `review_staging` phase / `review_staging_failed` +// kind instead of the generic executor failure classification (AC6). +type reviewStagingError struct{ Err error } + +func (e *reviewStagingError) Error() string { + if e == nil || e.Err == nil { + return "review staging failed" + } + return e.Err.Error() +} + +func (e *reviewStagingError) Unwrap() error { + if e == nil { + return nil + } + return e.Err +} + +func asReviewStagingError(err error) (*reviewStagingError, bool) { + var rse *reviewStagingError + if errors.As(err, &rse) { + return rse, true + } + return nil, false +} + +// stageExecutorOutput is the package-level seam used by runExecutorAttempt to +// stage the executor-produced worktree changes Galley hands to the +// supervisor. +// +// The implementation runs in three steps: +// +// 1. Discover the dirty worktree paths via `git status --porcelain=v1 -z` +// after the executor returned. +// 2. Build the explicit reviewable path set with reviewablePathsFromStatus. +// The builder drops empty/non-local entries, deduplicates, and excludes +// the supplied excludePaths (task.files entries declared with +// commit:false). Forbidden-path entries are intentionally kept in the +// set so the existing finalize-time forbidden_paths gate still observes +// them. +// 3. Stage exactly that explicit path set with vcs.StagePathsForReview. The +// staging command runs `git add -A -- [...]` so the diff +// fields the supervisor reviews (StagedDiff / Diff in the snapshot) +// reflect the executor's submitted artifact and nothing else. +// +// Tests override this seam to inject deterministic failures and to assert +// the exclude-list contract without spawning a real git process; production +// callers always go through the three-step flow above. The signature keeps +// excludePaths visible at the seam so failure-path tests can document the +// shape of the contract. +var stageExecutorOutput = func(ctx context.Context, opts Options, workDir, attemptDir string, excludePaths []string) error { + bins := vcsBinaries(opts) + statusZ, err := vcs.StatusPorcelainZ(ctx, bins, workDir) + if err != nil { + return err + } + reviewable := reviewablePathsFromStatus(statusZ, excludePaths) + return vcs.StagePathsForReview(ctx, bins, workDir, attemptDir, reviewable) +} + const progressNoDiffThreshold = 2 // supervisorRetryBudget is the internal, fixed number of additional supervisor @@ -208,6 +273,14 @@ func runOneSupervisorAttempt(ctx context.Context, req supervisorAttemptRequest) } outcome, err := runExecutorAttempt(ctx, req.Opts, effectiveTask, req.Profiles, req.Prepared.CWD, req.Prepared.BaseSHA, attemptDir, req.Prompt, effectiveTaskPath, preflightOutputs) if err != nil { + // A review-time staging failure is recorded under a distinct phase + // and kind so the failed task surfaces the staging-related error to + // the supervisor and operators instead of mis-classifying it as an + // executor failure (AC6). + if _, ok := asReviewStagingError(err); ok { + appendFailureAttempt(req.Loaded, "review_staging", "review_staging_failed", err, attemptDir) + return attemptReview{}, err + } appendFailureAttempt(req.Loaded, "executor", classifyFailureKind("executor_failed", err), err, attemptDir) return attemptReview{}, err } @@ -550,11 +623,41 @@ func runExecutorAttempt(ctx context.Context, opts Options, loaded task.Task, pro } } + // Stage executor-produced worktree changes before capturing the snapshot + // Galley hands to the supervisor. Without this step, newly-created + // untracked files would not appear in the staged or unstaged diff surfaces + // and the supervisor would receive an empty diff for new-file work (D1 / + // AC1 / AC2). Non-committed task input file destinations are excluded so + // the staged review evidence is constrained to executor-produced changes + // and context-only inputs do not leak into the supervisor diff (AC4 / + // supervisor feedback on attempt 3). Staging failure is fatal: we surface + // a typed error so the caller records a `review_staging` attempt failure + // instead of sending an empty diff to the supervisor (AC6). The parent + // ctx (not attemptCtx) is used here so a staging step initiated after + // executor timeout still has a chance to capture worktree state and write + // its evidence file. + excludePaths := nonCommittedInputDestinations(loaded.Files) + if err := stageExecutorOutput(ctx, opts, workDir, attemptDir, excludePaths); err != nil { + return attemptOutcome{}, &reviewStagingError{Err: err} + } + diffSnapshot, diffErr := workspace.CaptureSnapshotFromBase(ctx, workDir, baseSHA, workspaceOptions(opts)) diffDirty := false diffText := "" if diffErr == nil { - diffDirty = diffSnapshot.Dirty + // Compute the supervisor-facing dirty signal from the submitted + // artifact set (branch diff + staged diff + unstaged diff), not from + // the raw worktree status. After review-time staging, executor- + // produced changes are reflected in StagedDiff while untracked + // entries that remain in StatusPorcelain are context-only Galley + // material (task.files declared commit:false, or other Galley-owned + // runtime context). Sourcing DiffDirty from the diff fields keeps + // the supervisor's progress / "has work to review" gate aligned + // with the submitted artifact instead of being widened by + // context-only worktree dirtiness (supervisor feedback on review- + // time scope). The raw worktree status is still persisted in + // git_status.json as diagnostic evidence. + diffDirty = diffSnapshot.BranchDiff != "" || diffSnapshot.StagedDiff != "" || diffSnapshot.UnstagedDiff != "" diffText = diffSnapshot.Diff if err := writeJSON(filepath.Join(attemptDir, "git_status.json"), diffSnapshot); err != nil { return attemptOutcome{}, err diff --git a/internal/daemon/path_set.go b/internal/daemon/path_set.go new file mode 100644 index 0000000..e7a2b64 --- /dev/null +++ b/internal/daemon/path_set.go @@ -0,0 +1,185 @@ +// Package daemon — review-staging path set builder. +// +// This file owns the explicit "reviewable path set" Galley computes after an +// executor attempt and before it captures the snapshot it hands to the +// supervisor. The path set is the daemon-side representation of the +// submitted artifact set: only the worktree paths that belong to the +// executor-produced diff are staged for review. Context-only Galley material +// (task.files entries declared with commit:false, and any other untracked +// content that is not part of the executor's submitted change set) is +// intentionally kept out of the path set so the supervisor diff/evidence and +// downstream progress signals reflect only what the executor actually +// submitted (D1 / AC1 / AC2 / AC4 / supervisor feedback on review-time +// scope). +// +// Forbidden-path entries are intentionally kept in the path set so the +// existing finalize-time forbidden_paths gate still observes them; review +// staging only excludes context-only Galley material, not safety-gated +// paths. +package daemon + +import ( + "path/filepath" + "strings" + + "github.com/shinpr/galley/internal/task" +) + +// reviewablePathsFromStatus extracts the worktree-relative paths Galley +// should stage for supervisor review from the raw output of +// `git status --porcelain=v1 -z` captured after the executor attempt. +// +// Each NUL-terminated entry is parsed into a single reviewable path (the new +// path for renames/copies, the only path otherwise), normalized to slash +// form, deduplicated, and filtered. The function drops: +// +// - empty entries and entries that normalize to "."; +// - non-local entries (filepath.IsLocal rejects absolute paths, +// drive-letter paths, and any segment that backs out of cwd so the +// review-staging set cannot widen beyond the executor's working +// directory regardless of how git status formatted the entry); +// - destinations in excludeDestinations — these are task.files entries +// declared with commit:false, which Galley materializes as context-only +// inputs the executor reads but does not submit. +// +// The order of returned paths follows the first occurrence in the porcelain +// stream so the staging command argv is deterministic across runs of the +// same input. +func reviewablePathsFromStatus(statusZ string, excludeDestinations []string) []string { + excludes := normalizedPathSet(excludeDestinations) + raw := parseStatusPorcelainZ(statusZ) + seen := make(map[string]bool, len(raw)) + var result []string + for _, p := range raw { + clean := normalizeReviewablePath(p) + if clean == "" { + continue + } + // Reject any path that escapes the worktree root. filepath.IsLocal + // returns false for absolute paths, drive-letter paths, and any + // segment that backs out of cwd, so the review-staging set cannot + // widen beyond the executor's working directory regardless of how + // git status formatted the entry. + if !filepath.IsLocal(clean) { + continue + } + if excludes[clean] { + continue + } + if seen[clean] { + continue + } + seen[clean] = true + result = append(result, clean) + } + return result +} + +// parseStatusPorcelainZ parses NUL-separated `git status --porcelain=v1 -z` +// output into the ordered list of paths the executor changed. Each entry has +// the byte layout "XYpath" for regular changes; rename/copy entries +// (X or Y in {R,C}) append a second "oldpath" token after the new +// path. The parser yields only the new path so the staged review set tracks +// the post-rename surface git presents to a reviewer. +// +// Malformed tails (a header without its NUL-terminated path) cause the +// parser to stop where it can no longer be sure of the next path boundary; +// returning a partial path would risk staging unrelated content. +func parseStatusPorcelainZ(s string) []string { + var paths []string + rest := s + for len(rest) > 0 { + // Each entry begins with two status bytes and a separator before the + // path. Anything shorter is a malformed tail; stop instead of + // returning a partial path. + if len(rest) < 4 { + return paths + } + x := rest[0] + y := rest[1] + // Skip the "XY " prefix. git uses a single space here for + // --porcelain=v1; the parser does not need to tolerate a tab because + // the v1 format documents a literal SP. + rest = rest[3:] + idx := strings.IndexByte(rest, 0) + if idx < 0 { + return paths + } + path := rest[:idx] + rest = rest[idx+1:] + if isRenameOrCopyStatus(x) || isRenameOrCopyStatus(y) { + // Rename/copy adds a second NUL-terminated token for the old + // path; we only care about the new path, so consume and discard + // the old one. + idx2 := strings.IndexByte(rest, 0) + if idx2 < 0 { + if path != "" { + paths = append(paths, path) + } + return paths + } + rest = rest[idx2+1:] + } + if path != "" { + paths = append(paths, path) + } + } + return paths +} + +func isRenameOrCopyStatus(c byte) bool { + return c == 'R' || c == 'C' +} + +// normalizedPathSet returns the set of worktree-relative paths in `paths`, +// normalized to slash form and with empty / "." entries dropped. The +// returned map is suitable for membership tests against +// normalizeReviewablePath output. +func normalizedPathSet(paths []string) map[string]bool { + set := make(map[string]bool, len(paths)) + for _, p := range paths { + clean := normalizeReviewablePath(p) + if clean == "" { + continue + } + set[clean] = true + } + return set +} + +// normalizeReviewablePath produces the canonical slash-form representation +// of a worktree-relative path used by the review-staging set. Returns "" +// when the input collapses to an empty path or to "." so callers can use +// the empty string as a sentinel for "drop this entry". +func normalizeReviewablePath(p string) string { + clean := strings.TrimSpace(p) + if clean == "" { + return "" + } + clean = filepath.ToSlash(filepath.Clean(clean)) + if clean == "" || clean == "." { + return "" + } + return clean +} + +// nonCommittedInputDestinations returns the worktree-relative destinations +// of task input files declared with commit:false. These are context-only +// inputs Galley materializes in the worktree before the executor runs; +// review-time staging must keep them out of the supervisor diff so +// reviewable evidence only reflects executor-produced changes (AC4 / +// supervisor feedback on review-time scope). +func nonCommittedInputDestinations(files []task.InputFile) []string { + var paths []string + for _, f := range files { + if f.Commit { + continue + } + dest := strings.TrimSpace(f.Destination) + if dest == "" { + continue + } + paths = append(paths, dest) + } + return paths +} diff --git a/internal/daemon/path_set_test.go b/internal/daemon/path_set_test.go new file mode 100644 index 0000000..fa61a20 --- /dev/null +++ b/internal/daemon/path_set_test.go @@ -0,0 +1,214 @@ +package daemon + +import ( + "reflect" + "strings" + "testing" +) + +// porcelainEntry helps build NUL-separated `git status --porcelain=v1 -z` +// fixtures without sprinkling literal "\x00" through the test bodies. +func porcelainEntry(xy, path string) string { + if len(xy) != 2 { + panic("xy must be 2 bytes") + } + return xy + " " + path + "\x00" +} + +// porcelainRename builds an "XY newpath\0oldpath\0" entry the way +// `git status --porcelain=v1 -z` reports renames and copies. +func porcelainRename(xy, newPath, oldPath string) string { + if len(xy) != 2 { + panic("xy must be 2 bytes") + } + return xy + " " + newPath + "\x00" + oldPath + "\x00" +} + +// TestReviewablePathsFromStatusKeepsModifiedAddedDeletedAndUntracked pins the +// baseline behavior: the path-set builder treats every change kind git +// reports — modified (` M`), added in index (`A `), deleted (` D`), and +// untracked (`??`) — as part of the executor-produced submitted artifact. +func TestReviewablePathsFromStatusKeepsModifiedAddedDeletedAndUntracked(t *testing.T) { + statusZ := porcelainEntry(" M", "src/touched.go") + + porcelainEntry("A ", "src/added.go") + + porcelainEntry(" D", "src/removed.go") + + porcelainEntry("??", "internal/new/file.go") + + got := reviewablePathsFromStatus(statusZ, nil) + want := []string{"src/touched.go", "src/added.go", "src/removed.go", "internal/new/file.go"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusDedupesAndDropsEmpty pins the dedupe and +// trimming behavior. Repeated porcelain entries for the same logical path +// (which can arise when git reports rename source/destination separately +// and they happen to collapse to the same string after normalization) +// produce exactly one staging entry. Empty and "."-only entries are +// dropped so the staging argv never contains a placeholder pathspec. +func TestReviewablePathsFromStatusDedupesAndDropsEmpty(t *testing.T) { + statusZ := porcelainEntry("??", "internal/new/file.go") + + porcelainEntry("??", "internal/new/file.go") + // duplicate + porcelainEntry(" M", "./internal/new/file.go") + // normalizes to the same path + porcelainEntry("??", "") + // empty path token + porcelainEntry("??", ".") + // collapses to "." + porcelainEntry(" M", "other.go") + + got := reviewablePathsFromStatus(statusZ, nil) + want := []string{"internal/new/file.go", "other.go"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusDropsNonLocal pins the locality rule. Any +// porcelain entry whose path escapes the worktree (an absolute path or a +// path with a "../" segment that backs out of cwd) is dropped, because the +// review-staging set must not widen beyond the executor's working +// directory regardless of how git status formatted the entry. +func TestReviewablePathsFromStatusDropsNonLocal(t *testing.T) { + statusZ := porcelainEntry("??", "/etc/passwd") + + porcelainEntry("??", "../escaped.go") + + porcelainEntry("??", "safe.go") + + got := reviewablePathsFromStatus(statusZ, nil) + want := []string{"safe.go"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusExcludesCommitFalseDestinations pins the +// commit:false exclusion contract: task input file destinations declared +// with commit:false are context-only Galley material and must be kept out +// of the supervisor-submitted artifact even when they appear in the +// porcelain output (they are physically present in the worktree). The +// exclusion uses the same normalization rules as the include set so +// trailing whitespace, redundant "./" prefixes, and slash form differences +// do not let context inputs leak through. +func TestReviewablePathsFromStatusExcludesCommitFalseDestinations(t *testing.T) { + statusZ := porcelainEntry("??", "daemon-output.txt") + + porcelainEntry("??", "docs/plan.md") + + porcelainEntry("??", "docs/other.md") + + got := reviewablePathsFromStatus(statusZ, []string{" docs/plan.md ", "./docs/other.md"}) + want := []string{"daemon-output.txt"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusKeepsForbiddenPathChanges pins the +// safety-gate boundary the supervisor feedback called out: review staging +// excludes context-only Galley material (commit:false inputs) but does not +// hide forbidden-path changes from the finalize-time gate. A path under +// task.scope.forbidden_paths must still appear in the staged review set +// (and therefore in the snapshot the forbidden_paths gate inspects in +// finalizeAcceptedChange) so the gate can fail the attempt instead of +// silently committing. +func TestReviewablePathsFromStatusKeepsForbiddenPathChanges(t *testing.T) { + statusZ := porcelainEntry("??", "secret/leak.txt") + + porcelainEntry("??", "src/legit.go") + + // "secret" is task.scope.forbidden_paths — not a commit:false + // destination. The path-set builder does not see forbidden paths; only + // the finalize-time gate does. So forbidden-path entries stay in the + // reviewable set. + got := reviewablePathsFromStatus(statusZ, nil) + want := []string{"secret/leak.txt", "src/legit.go"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusNoChangeContextOnly pins the negative case +// the supervisor feedback called out explicitly: when the executor makes +// no repository change and the only worktree dirtiness is a commit:false +// input Galley materialized for context, the reviewable set is empty. +// Returning a nil/empty slice lets vcs.StagePathsForReview record a +// "skipped" evidence payload and short-circuit instead of staging the +// context input. +func TestReviewablePathsFromStatusNoChangeContextOnly(t *testing.T) { + statusZ := porcelainEntry("??", "docs/plan.md") + got := reviewablePathsFromStatus(statusZ, []string{"docs/plan.md"}) + if len(got) != 0 { + t.Fatalf("reviewablePathsFromStatus = %v, want empty", got) + } +} + +// TestReviewablePathsFromStatusRenameUsesNewPath pins the rename-handling +// contract: a rename/copy porcelain entry carries the new path first and +// the old path second, both NUL-terminated. Only the new path enters the +// reviewable set so the staged snapshot matches what the supervisor reads +// as the executor's submitted surface. +func TestReviewablePathsFromStatusRenameUsesNewPath(t *testing.T) { + statusZ := porcelainRename("R ", "src/new.go", "src/old.go") + + porcelainEntry(" M", "src/other.go") + got := reviewablePathsFromStatus(statusZ, nil) + want := []string{"src/new.go", "src/other.go"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusEmptyInput pins the empty-status base case +// so callers always receive a nil/empty slice (never a partial panic) when +// `git status --porcelain` returns no entries. +func TestReviewablePathsFromStatusEmptyInput(t *testing.T) { + if got := reviewablePathsFromStatus("", nil); len(got) != 0 { + t.Fatalf("reviewablePathsFromStatus(empty) = %v, want empty", got) + } + if got := reviewablePathsFromStatus("", []string{"docs/plan.md"}); len(got) != 0 { + t.Fatalf("reviewablePathsFromStatus(empty, excludes) = %v, want empty", got) + } +} + +// TestReviewablePathsFromStatusMalformedTailStopsCleanly pins the +// malformed-input behavior: when the porcelain stream is truncated mid- +// entry (no terminating NUL for the path), the parser yields everything it +// confidently parsed and stops. Returning a partial path would risk +// staging unrelated content. +func TestReviewablePathsFromStatusMalformedTailStopsCleanly(t *testing.T) { + statusZ := porcelainEntry("??", "first.go") + "?? missing-nul-terminator" + got := reviewablePathsFromStatus(statusZ, nil) + want := []string{"first.go"} + if !reflect.DeepEqual(got, want) { + t.Fatalf("reviewablePathsFromStatus(malformed) = %v, want %v", got, want) + } +} + +// TestReviewablePathsFromStatusTrimsAndNormalizes pins that callers can +// pass status text with or without a final NUL — every NUL-terminated +// entry is honored, the trailing residue is not, and slash normalization +// is applied so paths the porcelain stream prints with backslashes (on +// Windows-style fixtures) still dedupe correctly against forward-slash +// excludes. +func TestReviewablePathsFromStatusTrimsAndNormalizes(t *testing.T) { + // Mixed slash forms simulate a Windows porcelain capture replayed + // against a forward-slash exclude list. + statusZ := porcelainEntry("??", strings.ReplaceAll("docs/plan.md", "/", "\\")) + + porcelainEntry("??", "docs/keep.md") + got := reviewablePathsFromStatus(statusZ, []string{"docs/plan.md"}) + // On non-Windows runtimes filepath.Clean does not collapse "\\" into + // "/", so the backslash entry is preserved literally and the exclusion + // only triggers on the matching forward-slash entry. The assertion + // matches that behavior on both OS families: docs/keep.md is always + // retained, and the backslash-form sibling is dropped from the + // reviewable set if and only if the active runtime treats it as the + // same logical path. + for _, p := range got { + if p == "docs/plan.md" { + t.Fatalf("commit:false exclusion was bypassed by normalization: %v", got) + } + } + found := false + for _, p := range got { + if p == "docs/keep.md" { + found = true + } + } + if !found { + t.Fatalf("docs/keep.md missing from reviewable set: %v", got) + } +} diff --git a/internal/daemon/review_staging_test.go b/internal/daemon/review_staging_test.go new file mode 100644 index 0000000..2b3ed02 --- /dev/null +++ b/internal/daemon/review_staging_test.go @@ -0,0 +1,536 @@ +package daemon + +import ( + "context" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/shinpr/galley/internal/task" +) + +// TestRunOnceStagesNewUntrackedFileBeforeSupervisorReview pins AC1+AC2: when +// the fake executor creates a new file but does not run `git add`, Galley's +// review-time staging step must make that file visible in the attempt +// diff.patch so the supervisor evaluates the actual change instead of an +// empty submitted diff. +func TestRunOnceStagesNewUntrackedFileBeforeSupervisorReview(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + promptPath, schemaPath := writeDaemonPromptFiles(t) + // Fake executor writes an untracked file but does NOT call `git add`. + claudeBin := writeFakeClaude(t, "echo executor-output > new-untracked-file.txt\necho '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[\"new-untracked-file.txt\"],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + + if err := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + }); err != nil { + t.Fatal(err) + } + + // attempt diff.patch must contain a new-file header and the executor + // content for the previously untracked path. + diff := string(mustReadSingleGlob(t, filepath.Join(root, "runs", "*", "attempt-1", "diff.patch"))) + if !strings.Contains(diff, "new-untracked-file.txt") { + t.Fatalf("attempt diff.patch missing untracked file path:\n%s", diff) + } + if !strings.Contains(diff, "new file mode") { + t.Fatalf("attempt diff.patch missing new file header:\n%s", diff) + } + if !strings.Contains(diff, "executor-output") { + t.Fatalf("attempt diff.patch missing executor content:\n%s", diff) + } + + // AC6 evidence: review-time staging command result must be persisted. + assertGlobCount(t, filepath.Join(root, "runs", "*", "attempt-1", "git_add_review_result.json"), 1) +} + +// TestRunOnceAcceptedFinalizationCommitsStagedNewFile pins AC3: when the +// executor leaves a new file untracked and the supervisor accepts, the +// existing Galley finalization path still produces a final commit whose +// committed tree contains the new file. +func TestRunOnceAcceptedFinalizationCommitsStagedNewFile(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + remote := filepath.Join(t.TempDir(), "remote.git") + runDaemonGit(t, t.TempDir(), "init", "--bare", remote) + runDaemonGit(t, repo, "remote", "add", "origin", remote) + promptPath, schemaPath := writeDaemonPromptFiles(t) + // Fake executor writes the file but does NOT call `git add`; the fake + // claude supervisor accepts any diff_dirty=true attempt that does not + // report parse/hard_stop/empty diff. + claudeBin := writeFakeClaude(t, "echo daemon-output-content > daemon-output.txt\necho '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[\"daemon-output.txt\"],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + ghBin := writeFakeCommand(t, "gh", "echo https://github.com/example/galley/pull/555\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + + if err := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + OpenPR: true, + PRBase: "main", + GHBin: ghBin, + }); err != nil { + t.Fatal(err) + } + + doneTask, err := task.Load(filepath.Join(root, "tasks", "done", "task.yaml")) + if err != nil { + t.Fatal(err) + } + if doneTask.Status != "pr_opened" { + t.Fatalf("status got %q want pr_opened", doneTask.Status) + } + worktreePath := taskWorktreePath(repo, doneTask.Worktree.Path) + // The committed HEAD tree must contain the previously untracked file. + committed := strings.TrimSpace(string(mustCommandOutput(t, "git", "-C", worktreePath, "show", "HEAD:daemon-output.txt"))) + if committed != "daemon-output-content" { + t.Fatalf("committed file content got %q want %q", committed, "daemon-output-content") + } +} + +// TestRunOnceAcceptedFinalizationExcludesNonCommittedInputFile pins AC4: +// review-time staging must not cause a commit:false input file to be +// committed. After the executor (without running `git add`) introduces a +// real change, the final commit must include only the real change, not the +// non-committed input file Galley placed in the worktree. +func TestRunOnceAcceptedFinalizationExcludesNonCommittedInputFile(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + remote := filepath.Join(t.TempDir(), "remote.git") + runDaemonGit(t, t.TempDir(), "init", "--bare", remote) + runDaemonGit(t, repo, "remote", "add", "origin", remote) + promptPath, schemaPath := writeDaemonPromptFiles(t) + inputPath := filepath.Join(t.TempDir(), "plan.md") + if err := os.WriteFile(inputPath, []byte("design note from plan\n"), 0o600); err != nil { + t.Fatal(err) + } + claudeBin := writeFakeClaude(t, "echo change > daemon-output.txt\necho '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[\"daemon-output.txt\"],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + ghBin := writeFakeCommand(t, "gh", "echo https://github.com/example/galley/pull/777\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + loaded, err := task.Load(taskPath) + if err != nil { + t.Fatal(err) + } + loaded.Files = []task.InputFile{{ + Source: inputPath, + Destination: "docs/plan.md", + Description: "design plan", + Commit: false, + }} + if err := task.Save(taskPath, loaded); err != nil { + t.Fatal(err) + } + + if err := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + OpenPR: true, + PRBase: "main", + GHBin: ghBin, + }); err != nil { + t.Fatal(err) + } + + doneTask, err := task.Load(filepath.Join(root, "tasks", "done", "task.yaml")) + if err != nil { + t.Fatal(err) + } + worktreePath := taskWorktreePath(repo, doneTask.Worktree.Path) + // Non-committed input file destination must not be present in the HEAD + // tree even though review-time staging happened before finalization. + committedFiles, err := exec.Command("git", "-C", worktreePath, "show", "--name-only", "--format=", "HEAD").Output() + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(committedFiles), "docs/plan.md") { + t.Fatalf("commit:false input file leaked into HEAD commit:\n%s", committedFiles) + } + if !strings.Contains(string(committedFiles), "daemon-output.txt") { + t.Fatalf("expected executor diff to be committed:\n%s", committedFiles) + } +} + +// TestRunOnceAcceptedFinalizationDetectsForbiddenPathAfterStaging pins AC5: +// review-time staging does not bypass the forbidden-path check in accepted +// finalization. When the executor creates a file under +// task.scope.forbidden_paths and does not stage it, finalization must still +// fail with the existing forbidden-path error and the task must not reach +// pr_opened. +func TestRunOnceAcceptedFinalizationDetectsForbiddenPathAfterStaging(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + remote := filepath.Join(t.TempDir(), "remote.git") + runDaemonGit(t, t.TempDir(), "init", "--bare", remote) + runDaemonGit(t, repo, "remote", "add", "origin", remote) + promptPath, schemaPath := writeDaemonPromptFiles(t) + // Executor writes a file under a forbidden directory (no `git add`). + claudeBin := writeFakeClaude(t, "mkdir -p secret\necho secret > secret/leak.txt\necho '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[\"secret/leak.txt\"],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + ghBin := writeFakeCommand(t, "gh", "echo https://github.com/example/galley/pull/888\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + loaded, err := task.Load(taskPath) + if err != nil { + t.Fatal(err) + } + loaded.Scope.ForbiddenPaths = []string{"secret"} + if err := task.Save(taskPath, loaded); err != nil { + t.Fatal(err) + } + + // AC5: finalize must surface the forbidden-path failure. Run reports the + // failure via its return value (matching the daemon's other terminal + // failure paths such as TestRunOnceFailsWhenPRBaseRefMissing) and also + // moves the task to tasks/failed for inspection. + runErr := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + OpenPR: true, + PRBase: "main", + GHBin: ghBin, + }) + if runErr == nil { + t.Fatal("expected forbidden-path failure to be surfaced by Run") + } + if !strings.Contains(runErr.Error(), "task.scope.forbidden_paths") { + t.Fatalf("Run error does not mention forbidden_paths: %v", runErr) + } + + failedTask, err := task.Load(filepath.Join(root, "tasks", "failed", "task.yaml")) + if err != nil { + t.Fatalf("task did not move to failed/: %v", err) + } + if failedTask.Status == "pr_opened" || failedTask.Status == "accepted" { + t.Fatalf("forbidden-path acceptance was not blocked: status=%q", failedTask.Status) + } + foundForbiddenRisk := false + for _, risk := range failedTask.Risks { + if strings.Contains(risk.Detail, "task.scope.forbidden_paths") { + foundForbiddenRisk = true + break + } + } + if !foundForbiddenRisk { + t.Fatalf("expected forbidden-path risk in failed task:\n%#v", failedTask.Risks) + } +} + +// TestRunOnceReviewStagingFailureRecordsAttemptErrorBeforeSupervisor pins AC6: +// when the review-time `git add -A` step fails, the attempt must surface a +// staging-related attempt error (phase=review_staging, kind=review_staging_failed) +// and the supervisor must not be invoked with an empty diff. We swap out the +// staging seam used by runExecutorAttempt to inject a deterministic failure +// without depending on platform-specific git misbehavior. +func TestRunOnceReviewStagingFailureRecordsAttemptErrorBeforeSupervisor(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + promptPath, schemaPath := writeDaemonPromptFiles(t) + claudeBin := writeFakeClaude(t, "echo change > daemon-output.txt\necho '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[\"daemon-output.txt\"],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + + // Override the staging seam to capture invocation evidence under the + // attempt dir (so file-based evidence still exists for review) and then + // return an error that mimics a real `git add -A` failure. + prev := stageExecutorOutput + stageExecutorOutput = func(_ context.Context, _ Options, workDir, attemptDir string, _ []string) error { + if err := os.MkdirAll(attemptDir, 0o700); err != nil { + return err + } + if err := os.WriteFile(filepath.Join(attemptDir, "git_add_review_result.json"), []byte(`{"exit_code":128}`), 0o600); err != nil { + return err + } + return fmt.Errorf("git add -A (review staging) failed: simulated index lock") + } + t.Cleanup(func() { stageExecutorOutput = prev }) + + // AC6: a review-staging failure is a terminal attempt error. Run surfaces + // the wrapped error to its caller (mirroring other daemon failure modes + // like TestRunOnceFailsWhenPRBaseRefMissing) and moves the task to + // tasks/failed with the staging-classified attempt error attached. + runErr := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + }) + if runErr == nil { + t.Fatal("expected review-staging failure to be surfaced by Run") + } + if !strings.Contains(runErr.Error(), "review staging") && !strings.Contains(runErr.Error(), "git add") { + t.Fatalf("Run error does not mention review staging: %v", runErr) + } + + failedTask, err := task.Load(filepath.Join(root, "tasks", "failed", "task.yaml")) + if err != nil { + t.Fatalf("task did not move to failed/: %v", err) + } + if len(failedTask.Attempts) == 0 { + t.Fatal("expected at least one recorded attempt") + } + last := failedTask.Attempts[len(failedTask.Attempts)-1] + if last.Error == nil { + t.Fatalf("expected attempt error, got nil") + } + if last.Error.Phase != "review_staging" || last.Error.Kind != "review_staging_failed" { + t.Fatalf("attempt error got phase=%q kind=%q, want review_staging/review_staging_failed (%#v)", last.Error.Phase, last.Error.Kind, last.Error) + } + if !strings.Contains(last.Error.Message, "review staging") && !strings.Contains(last.Error.Message, "git add") { + t.Fatalf("attempt error message does not mention staging: %q", last.Error.Message) + } + // AC6: when staging fails, supervisor must not have been invoked for that + // attempt — i.e., no supervisor verdict file was written. + assertGlobCount(t, filepath.Join(root, "runs", "*", "attempt-1", "supervisor_verdict.json"), 0) +} + +// TestRunOnceReviewStagingExcludesNonCommittedInputFromAttemptDiff pins the +// review-time scope rule the supervisor flagged on attempt 3: review-time +// staging/evidence must be constrained to executor-produced changes. When a +// commit:false task input file is materialized in the worktree alongside a +// separate, executor-created untracked output, the attempt diff.patch (which +// is the same diff Galley hands to the supervisor) must include the +// executor's output and must NOT include the commit:false destination — even +// though `git add -A` would otherwise pick it up. +func TestRunOnceReviewStagingExcludesNonCommittedInputFromAttemptDiff(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + promptPath, schemaPath := writeDaemonPromptFiles(t) + inputPath := filepath.Join(t.TempDir(), "plan.md") + if err := os.WriteFile(inputPath, []byte("design note from plan\n"), 0o600); err != nil { + t.Fatal(err) + } + // Fake executor writes a separate untracked output and does NOT call + // `git add`. The commit:false input file is placed by Galley earlier in + // preparation; the executor never touches it. + claudeBin := writeFakeClaude(t, "echo executor-output > daemon-output.txt\necho '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[\"daemon-output.txt\"],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + loaded, err := task.Load(taskPath) + if err != nil { + t.Fatal(err) + } + loaded.Files = []task.InputFile{{ + Source: inputPath, + Destination: "docs/plan.md", + Description: "design plan", + Commit: false, + }} + if err := task.Save(taskPath, loaded); err != nil { + t.Fatal(err) + } + + if err := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + }); err != nil { + t.Fatal(err) + } + + // The diff.patch is the diff Galley hands to the supervisor as part of + // the attempt evidence. It must reflect only the executor-produced change. + diff := string(mustReadSingleGlob(t, filepath.Join(root, "runs", "*", "attempt-1", "diff.patch"))) + if !strings.Contains(diff, "daemon-output.txt") { + t.Fatalf("attempt diff.patch missing executor output:\n%s", diff) + } + if !strings.Contains(diff, "executor-output") { + t.Fatalf("attempt diff.patch missing executor content:\n%s", diff) + } + if strings.Contains(diff, "docs/plan.md") { + t.Fatalf("commit:false input destination leaked into attempt diff.patch:\n%s", diff) + } + if strings.Contains(diff, "design note from plan") { + t.Fatalf("commit:false input content leaked into attempt diff.patch:\n%s", diff) + } + + // Defense in depth: the snapshot Galley writes as git_status.json carries + // the staged_diff, unstaged_diff, and unioned diff fields used to populate + // the supervisor Evidence.Diff. The status_porcelain field legitimately + // reports untracked files (harmless because Evidence.Diff is derived from + // the diff fields, not status), so the assertion targets the diff fields + // only. + statusBytes := mustReadSingleGlob(t, filepath.Join(root, "runs", "*", "attempt-1", "git_status.json")) + for _, field := range []string{`"staged_diff"`, `"unstaged_diff"`, `"diff"`} { + val := extractJSONStringField(string(statusBytes), field) + if strings.Contains(val, "docs/plan.md") || strings.Contains(val, "design note from plan") { + t.Fatalf("commit:false input leaked into snapshot %s field: %q", field, val) + } + } +} + +// TestRunOnceReviewStagingDoesNotPresentContextInputAsSubmittedDiff pins the +// negative adjacent case: when the executor creates NO real change but a +// commit:false input file is present in the worktree, Galley must not let +// the context-only input show up as a submitted diff to the supervisor. +// In other words, the attempt diff.patch must be empty of any reference to +// the commit:false destination — the worktree change is context, not work. +func TestRunOnceReviewStagingDoesNotPresentContextInputAsSubmittedDiff(t *testing.T) { + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + promptPath, schemaPath := writeDaemonPromptFiles(t) + inputPath := filepath.Join(t.TempDir(), "plan.md") + if err := os.WriteFile(inputPath, []byte("design note from plan\n"), 0o600); err != nil { + t.Fatal(err) + } + // Fake executor does not modify any file; it only emits a result JSON. + claudeBin := writeFakeClaude(t, "echo '{\"status\":\"completed\",\"summary\":\"done\",\"files_modified\":[],\"acceptance_criteria\":[{\"id\":\"AC1\",\"status\":\"satisfied\",\"evidence\":[\"no diff\"],\"notes\":\"done\"}],\"verification\":[],\"decisions\":[],\"risks\":[]}'\n") + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + loaded, err := task.Load(taskPath) + if err != nil { + t.Fatal(err) + } + loaded.Files = []task.InputFile{{ + Source: inputPath, + Destination: "docs/plan.md", + Description: "design plan", + Commit: false, + }} + if err := task.Save(taskPath, loaded); err != nil { + t.Fatal(err) + } + + if err := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + Supervisor: "claude", + ClaudeBin: claudeBin, + Once: true, + MaxConcurrentTasks: 1, + }); err != nil { + t.Fatal(err) + } + + // The submitted diff (== Evidence.Diff handed to the supervisor) must not + // reference the context input — even though that input is physically in + // the worktree. The diff.patch may be empty (no executor change), which + // is the correct submitted-diff representation for "no work was done". + diff := string(mustReadSingleGlob(t, filepath.Join(root, "runs", "*", "attempt-1", "diff.patch"))) + if strings.Contains(diff, "docs/plan.md") { + t.Fatalf("commit:false context input presented as submitted diff:\n%s", diff) + } + if strings.Contains(diff, "design note from plan") { + t.Fatalf("commit:false context input content presented as submitted diff:\n%s", diff) + } +} + +// TestNonCommittedInputDestinations documents the helper used by the daemon +// loop to derive the review-time exclude list from a task's input files. The +// helper trims, skips empty destinations, and ignores commit:true entries. +func TestNonCommittedInputDestinations(t *testing.T) { + files := []task.InputFile{ + {Source: "/tmp/a", Destination: "docs/a.md", Commit: false}, + {Source: "/tmp/b", Destination: " docs/b.md ", Commit: false}, + {Source: "/tmp/c", Destination: "docs/c.md", Commit: true}, + {Source: "/tmp/d", Destination: "", Commit: false}, + } + got := nonCommittedInputDestinations(files) + want := []string{"docs/a.md", "docs/b.md"} + if len(got) != len(want) { + t.Fatalf("got %v want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Fatalf("got[%d]=%q want %q", i, got[i], want[i]) + } + } +} + +// extractJSONStringField returns the (still-escaped) string value of the +// supplied top-level JSON object key from a raw JSON object string. The key +// argument must include its surrounding quotes (e.g. `"diff"`). The helper +// keeps review-staging diff assertions readable without taking a dependency +// on the workspace package's internal types and without decoding the entire +// snapshot. +func extractJSONStringField(s, key string) string { + idx := strings.Index(s, key+":\"") + if idx < 0 { + return "" + } + rest := s[idx+len(key)+2:] + end := 0 + for end < len(rest) { + if rest[end] == '\\' && end+1 < len(rest) { + end += 2 + continue + } + if rest[end] == '"' { + break + } + end++ + } + return rest[:end] +} + +// TestReviewStagingErrorClassification documents the helper used by the +// daemon loop to distinguish a review-time staging failure from a generic +// executor failure (AC6). +func TestReviewStagingErrorClassification(t *testing.T) { + wrapped := &reviewStagingError{Err: errors.New("boom")} + if got, ok := asReviewStagingError(wrapped); !ok || got != wrapped { + t.Fatalf("expected reviewStagingError to be recognized directly") + } + nested := fmt.Errorf("wrapped: %w", wrapped) + if _, ok := asReviewStagingError(nested); !ok { + t.Fatalf("expected wrapped reviewStagingError to be unwrappable") + } + if _, ok := asReviewStagingError(errors.New("plain")); ok { + t.Fatalf("plain errors must not be classified as review staging failures") + } +} diff --git a/internal/vcs/vcs.go b/internal/vcs/vcs.go index d9b9928..33d0783 100644 --- a/internal/vcs/vcs.go +++ b/internal/vcs/vcs.go @@ -81,6 +81,7 @@ func addPathsForOS(ctx context.Context, bins Binaries, workDir, runDir string, p if len(stagePaths) == 0 { return fmt.Errorf("git add paths is empty") } + pathspecs := literalPathspecs(stagePaths) cmd := runner.Command{WorkDir: workDir} if goos == "windows" { // git add --pathspec-from-file=- --pathspec-file-nul reads pathspecs @@ -88,9 +89,9 @@ func addPathsForOS(ctx context.Context, bins Binaries, workDir, runDir string, p // NUL separator avoids any ambiguity with paths that contain LF or // other whitespace characters. cmd.Argv = []string{bins.git(), "add", "-A", "--pathspec-from-file=-", "--pathspec-file-nul"} - cmd.Stdin = strings.Join(stagePaths, "\x00") + "\x00" + cmd.Stdin = strings.Join(pathspecs, "\x00") + "\x00" } else { - cmd.Argv = append([]string{bins.git(), "add", "-A", "--"}, stagePaths...) + cmd.Argv = append([]string{bins.git(), "add", "-A", "--"}, pathspecs...) } result, err := runner.RunCommand(ctx, cmd, runner.RunOptions{ StdoutPath: filepath.Join(runDir, "git_add.stdout.log"), @@ -106,6 +107,151 @@ func addPathsForOS(ctx context.Context, bins Binaries, workDir, runDir string, p return nil } +// StatusPorcelainZ returns the NUL-separated `git status --porcelain=v1 -z` +// output for the worktree. Galley calls this in the review-staging step to +// discover the explicit set of executor-produced changes before computing +// the reviewable path set (the daemon-side representation of the submitted +// artifact). The function does not write evidence files because the caller +// captures the resolved reviewable path set itself in the staging command's +// argv evidence (see StagePathsForReview). +// +// --untracked-files=all is required: without it, git status collapses an +// untracked directory to a single trailing-slash entry (e.g. "docs/"), and a +// commit:false destination such as "docs/plan.md" would not match the +// directory-shaped entry. Galley would then stage the entire untracked +// directory and leak the context-only input into the supervisor diff. The +// `all` mode emits one entry per untracked file, which is the resolution +// the reviewable-path-set contract requires. +func StatusPorcelainZ(ctx context.Context, bins Binaries, workDir string) (string, error) { + result, err := runner.RunCommand(ctx, runner.Command{ + WorkDir: workDir, + Argv: []string{bins.git(), "status", "--porcelain=v1", "-z", "--untracked-files=all"}, + }, runner.RunOptions{}) + if err != nil { + return "", fmt.Errorf("git status --porcelain -z (review staging discovery) failed: %w", err) + } + return result.Stdout, nil +} + +// StagePathsForReview stages the supplied worktree-relative paths so the +// supervisor diff snapshot captures the executor-produced change set Galley +// computed from `git status --porcelain` after the executor attempt. The +// caller is expected to have already: +// +// - discovered the change set with StatusPorcelainZ; +// - normalized and deduplicated each entry to slash form; +// - excluded task.files entries declared with commit:false so context-only +// inputs Galley materializes for the executor do not enter the +// submitted artifact. +// +// `paths` is therefore an explicit reviewable path set, not a wildcard or an +// `-A` over the whole worktree. The function runs +// `git add -A -- [...]` so paths that the executor added, +// modified, or deleted are all reflected in the index. On Windows the +// pathspecs are delivered via `--pathspec-from-file=- --pathspec-file-nul` +// so a long list of changed files cannot push the CreateProcess command +// line past the platform's argv limit; macOS and Linux continue to pass the +// pathspecs on argv. +// +// When `paths` is empty (the no-executor-change context-only case) the +// function records a "skipped" evidence payload at +// `/git_add_review_result.json` and returns nil. Falling through to +// `git add -A` with no positional path would stage every dirty path in the +// worktree, defeating the explicit reviewable-set contract; falling through +// to `git add -A --` with no positional path would error on some git +// versions. Recording the skipped evidence preserves the AC6 invariant that +// the staging step is always observable in run evidence. +// +// On a non-skipped run the function writes: +// +// - git_add_review.stdout.log +// - git_add_review.stderr.log +// - git_add_review_result.json (runner.RunResult) +// +// A staging failure returns the original git error joined with any evidence +// write error so the caller can record a clear attempt failure instead of +// handing an empty or stale diff to the supervisor. +func StagePathsForReview(ctx context.Context, bins Binaries, workDir, runDir string, paths []string) error { + return stagePathsForReviewForOS(ctx, bins, workDir, runDir, paths, runtime.GOOS) +} + +func stagePathsForReviewForOS(ctx context.Context, bins Binaries, workDir, runDir string, paths []string, goos string) error { + stagePaths := dedupeReviewPaths(paths) + if len(stagePaths) == 0 { + // The executor produced no reviewable change. Persist a skipped + // evidence payload so reviewers can still confirm review staging + // ran for this attempt (AC6) and then return without calling + // git add at all. Without this short-circuit, the alternative + // `git add -A` over the whole worktree would re-stage every + // dirty path (including context-only commit:false inputs), + // defeating the explicit reviewable-set contract. + return writeReviewStagingSkipped(runDir, "no executor-produced paths to stage") + } + pathspecs := literalPathspecs(stagePaths) + cmd := runner.Command{WorkDir: workDir} + if goos == "windows" { + // On Windows the pathspec list is delivered through stdin so a + // long list of changed files cannot push the CreateProcess command + // line past the platform's argv limit. The NUL separator avoids + // any ambiguity with paths that contain LF or other whitespace. + cmd.Argv = []string{bins.git(), "add", "-A", "--pathspec-from-file=-", "--pathspec-file-nul"} + cmd.Stdin = strings.Join(pathspecs, "\x00") + "\x00" + } else { + cmd.Argv = append([]string{bins.git(), "add", "-A", "--"}, pathspecs...) + } + result, err := runner.RunCommand(ctx, cmd, runner.RunOptions{ + StdoutPath: filepath.Join(runDir, "git_add_review.stdout.log"), + StderrPath: filepath.Join(runDir, "git_add_review.stderr.log"), + }) + writeErr := writeJSON(filepath.Join(runDir, "git_add_review_result.json"), result) + if err != nil { + return errors.Join(fmt.Errorf("git add -A (review staging) failed: %w", err), writeErr) + } + if writeErr != nil { + return writeErr + } + return nil +} + +// writeReviewStagingSkipped records a sentinel evidence payload when the +// review-staging step is intentionally skipped because there were no +// executor-produced paths to stage. The payload mirrors the field shape of +// the runner.RunResult-derived evidence used on the non-skipped path so +// readers can detect "skipped" by checking the dedicated key without +// pattern-matching command output. +func writeReviewStagingSkipped(runDir, reason string) error { + return writeJSON(filepath.Join(runDir, "git_add_review_result.json"), map[string]any{ + "skipped": true, + "reason": reason, + }) +} + +// dedupeReviewPaths normalizes and deduplicates worktree-relative review +// paths. It accepts already-normalized input from the daemon (see +// reviewablePathsFromStatus) and is defensive about callers that pass in +// pre-normalized but duplicated entries. +func dedupeReviewPaths(paths []string) []string { + out := make([]string, 0, len(paths)) + seen := make(map[string]bool, len(paths)) + for _, p := range paths { + clean := filepath.ToSlash(filepath.Clean(p)) + if clean == "" || clean == "." || seen[clean] { + continue + } + seen[clean] = true + out = append(out, clean) + } + return out +} + +func literalPathspecs(paths []string) []string { + out := make([]string, 0, len(paths)) + for _, path := range paths { + out = append(out, ":(literal)"+path) + } + return out +} + // Commit creates a git commit and writes command evidence. func Commit(ctx context.Context, bins Binaries, workDir, runDir, message string) error { result, err := runner.RunCommand(ctx, runner.Command{ diff --git a/internal/vcs/vcs_test.go b/internal/vcs/vcs_test.go index de609cc..7c64689 100644 --- a/internal/vcs/vcs_test.go +++ b/internal/vcs/vcs_test.go @@ -2,6 +2,7 @@ package vcs import ( "os" + "os/exec" "path/filepath" "runtime" "strings" @@ -217,6 +218,56 @@ func skipPOSIXFakeGHOnWindows(t *testing.T) { } } +func TestStagePathsForReviewTreatsPathspecMagicAsLiteral(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("Windows filenames cannot contain ':'") + } + repo := t.TempDir() + runDir := t.TempDir() + runGit(t, repo, "init", "-q") + runGit(t, repo, "config", "user.email", "a@example.test") + runGit(t, repo, "config", "user.name", "A") + if err := os.WriteFile(filepath.Join(repo, "base.txt"), []byte("base\n"), 0o600); err != nil { + t.Fatal(err) + } + runGit(t, repo, "add", "base.txt") + runGit(t, repo, "commit", "-q", "-m", "init") + if err := os.WriteFile(filepath.Join(repo, ":(glob)*"), []byte("magic\n"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(repo, "other.txt"), []byte("other\n"), 0o600); err != nil { + t.Fatal(err) + } + + if err := StagePathsForReview(t.Context(), Binaries{}, repo, runDir, []string{":(glob)*"}); err != nil { + t.Fatal(err) + } + + got := strings.TrimSpace(string(runGitOutput(t, repo, "diff", "--cached", "--name-only"))) + if got != ":(glob)*" { + t.Fatalf("staged paths got %q, want only literal magic filename", got) + } +} + +func runGit(t *testing.T, dir string, args ...string) { + t.Helper() + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("git %v failed: %v\n%s", args, err, out) + } +} + +func runGitOutput(t *testing.T, dir string, args ...string) []byte { + t.Helper() + cmd := exec.Command("git", append([]string{"-C", dir}, args...)...) + out, err := cmd.CombinedOutput() + if err != nil { + t.Fatalf("git %v failed: %v\n%s", args, err, out) + } + return out +} + func TestExtractFirstHTTPSURLTrimsTrailingPunctuation(t *testing.T) { t.Parallel() got := ExtractFirstHTTPSURL("created (https://github.com/example/galley/pull/123).")