Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ This project follows semantic versioning.

## Unreleased

## v0.7.0 - 2026-05-25

### Added

- Setup executor preflight phase and `environment.yaml` `setup.commands[]`. Galley now prepares fresh task worktrees before acceptance skeleton creation and implementation by running authored setup commands or dispatching a setup executor to learn a reusable setup plan, which is persisted back to `environment.yaml` on success.
Expand All @@ -14,6 +16,11 @@ This project follows semantic versioning.
### Changed

- Packaged Claude and Codex Galley plugins are now versioned as `0.1.13`: setup executor prompts now include explicit result JSON contracts and troubleshooting guidance routes `setup_failed` diagnosis through setup run evidence.
- The daemon now stages the executor-produced reviewable path set before capturing supervisor diff evidence, so newly created untracked files appear in `diff.patch` and supervisor review while `commit:false` input files and unrelated context-only worktree dirtiness stay out of the submitted diff. Review staging writes `git_add_review.stdout.log`, `git_add_review.stderr.log`, and `git_add_review_result.json` attempt evidence when it runs, and records a skipped result when there is no reviewable path set.

### Fixed

- Review-time staging failures are now recorded as attempt errors with `phase=review_staging` / `kind=review_staging_failed`; Galley does not invoke the supervisor with an empty or stale diff and moves the task to `tasks/failed` with staging-specific evidence for operator diagnosis.

## v0.6.2 - 2026-05-25

Expand Down
105 changes: 104 additions & 1 deletion internal/daemon/loop.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,74 @@ import (
"github.com/shinpr/galley/internal/supervisor"
"github.com/shinpr/galley/internal/task"
"github.com/shinpr/galley/internal/taskstate"
"github.com/shinpr/galley/internal/vcs"
"github.com/shinpr/galley/internal/workspace"
)

// reviewStagingError signals that Galley's review-time `git add -A` step
// failed after the executor exited and before the supervisor evaluation
// would have been driven against an empty or stale diff. The loop
// classification path (runOneSupervisorAttempt) inspects this type to record
// the failure with a distinct `review_staging` phase / `review_staging_failed`
// kind instead of the generic executor failure classification (AC6).
type reviewStagingError struct{ Err error }

func (e *reviewStagingError) Error() string {
if e == nil || e.Err == nil {
return "review staging failed"
}
return e.Err.Error()
}

func (e *reviewStagingError) Unwrap() error {
if e == nil {
return nil
}
return e.Err
}

func asReviewStagingError(err error) (*reviewStagingError, bool) {
var rse *reviewStagingError
if errors.As(err, &rse) {
return rse, true
}
return nil, false
}

// stageExecutorOutput is the package-level seam used by runExecutorAttempt to
// stage the executor-produced worktree changes Galley hands to the
// supervisor.
//
// The implementation runs in three steps:
//
// 1. Discover the dirty worktree paths via `git status --porcelain=v1 -z`
// after the executor returned.
// 2. Build the explicit reviewable path set with reviewablePathsFromStatus.
// The builder drops empty/non-local entries, deduplicates, and excludes
// the supplied excludePaths (task.files entries declared with
// commit:false). Forbidden-path entries are intentionally kept in the
// set so the existing finalize-time forbidden_paths gate still observes
// them.
// 3. Stage exactly that explicit path set with vcs.StagePathsForReview. The
// staging command runs `git add -A -- <path> [<path>...]` so the diff
// fields the supervisor reviews (StagedDiff / Diff in the snapshot)
// reflect the executor's submitted artifact and nothing else.
//
// Tests override this seam to inject deterministic failures and to assert
// the exclude-list contract without spawning a real git process; production
// callers always go through the three-step flow above. The signature keeps
// excludePaths visible at the seam so failure-path tests can document the
// shape of the contract.
var stageExecutorOutput = func(ctx context.Context, opts Options, workDir, attemptDir string, excludePaths []string) error {
bins := vcsBinaries(opts)
statusZ, err := vcs.StatusPorcelainZ(ctx, bins, workDir)
if err != nil {
return err
}
reviewable := reviewablePathsFromStatus(statusZ, excludePaths)
return vcs.StagePathsForReview(ctx, bins, workDir, attemptDir, reviewable)
}

const progressNoDiffThreshold = 2

// supervisorRetryBudget is the internal, fixed number of additional supervisor
Expand Down Expand Up @@ -208,6 +273,14 @@ func runOneSupervisorAttempt(ctx context.Context, req supervisorAttemptRequest)
}
outcome, err := runExecutorAttempt(ctx, req.Opts, effectiveTask, req.Profiles, req.Prepared.CWD, req.Prepared.BaseSHA, attemptDir, req.Prompt, effectiveTaskPath, preflightOutputs)
if err != nil {
// A review-time staging failure is recorded under a distinct phase
// and kind so the failed task surfaces the staging-related error to
// the supervisor and operators instead of mis-classifying it as an
// executor failure (AC6).
if _, ok := asReviewStagingError(err); ok {
appendFailureAttempt(req.Loaded, "review_staging", "review_staging_failed", err, attemptDir)
return attemptReview{}, err
}
appendFailureAttempt(req.Loaded, "executor", classifyFailureKind("executor_failed", err), err, attemptDir)
return attemptReview{}, err
}
Expand Down Expand Up @@ -550,11 +623,41 @@ func runExecutorAttempt(ctx context.Context, opts Options, loaded task.Task, pro
}
}

// Stage executor-produced worktree changes before capturing the snapshot
// Galley hands to the supervisor. Without this step, newly-created
// untracked files would not appear in the staged or unstaged diff surfaces
// and the supervisor would receive an empty diff for new-file work (D1 /
// AC1 / AC2). Non-committed task input file destinations are excluded so
// the staged review evidence is constrained to executor-produced changes
// and context-only inputs do not leak into the supervisor diff (AC4 /
// supervisor feedback on attempt 3). Staging failure is fatal: we surface
// a typed error so the caller records a `review_staging` attempt failure
// instead of sending an empty diff to the supervisor (AC6). The parent
// ctx (not attemptCtx) is used here so a staging step initiated after
// executor timeout still has a chance to capture worktree state and write
// its evidence file.
excludePaths := nonCommittedInputDestinations(loaded.Files)
if err := stageExecutorOutput(ctx, opts, workDir, attemptDir, excludePaths); err != nil {
return attemptOutcome{}, &reviewStagingError{Err: err}
}

diffSnapshot, diffErr := workspace.CaptureSnapshotFromBase(ctx, workDir, baseSHA, workspaceOptions(opts))
diffDirty := false
diffText := ""
if diffErr == nil {
diffDirty = diffSnapshot.Dirty
// Compute the supervisor-facing dirty signal from the submitted
// artifact set (branch diff + staged diff + unstaged diff), not from
// the raw worktree status. After review-time staging, executor-
// produced changes are reflected in StagedDiff while untracked
// entries that remain in StatusPorcelain are context-only Galley
// material (task.files declared commit:false, or other Galley-owned
// runtime context). Sourcing DiffDirty from the diff fields keeps
// the supervisor's progress / "has work to review" gate aligned
// with the submitted artifact instead of being widened by
// context-only worktree dirtiness (supervisor feedback on review-
// time scope). The raw worktree status is still persisted in
// git_status.json as diagnostic evidence.
diffDirty = diffSnapshot.BranchDiff != "" || diffSnapshot.StagedDiff != "" || diffSnapshot.UnstagedDiff != ""
diffText = diffSnapshot.Diff
if err := writeJSON(filepath.Join(attemptDir, "git_status.json"), diffSnapshot); err != nil {
return attemptOutcome{}, err
Expand Down
185 changes: 185 additions & 0 deletions internal/daemon/path_set.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
// Package daemon — review-staging path set builder.
//
// This file owns the explicit "reviewable path set" Galley computes after an
// executor attempt and before it captures the snapshot it hands to the
// supervisor. The path set is the daemon-side representation of the
// submitted artifact set: only the worktree paths that belong to the
// executor-produced diff are staged for review. Context-only Galley material
// (task.files entries declared with commit:false, and any other untracked
// content that is not part of the executor's submitted change set) is
// intentionally kept out of the path set so the supervisor diff/evidence and
// downstream progress signals reflect only what the executor actually
// submitted (D1 / AC1 / AC2 / AC4 / supervisor feedback on review-time
// scope).
//
// Forbidden-path entries are intentionally kept in the path set so the
// existing finalize-time forbidden_paths gate still observes them; review
// staging only excludes context-only Galley material, not safety-gated
// paths.
package daemon

import (
"path/filepath"
"strings"

"github.com/shinpr/galley/internal/task"
)

// reviewablePathsFromStatus extracts the worktree-relative paths Galley
// should stage for supervisor review from the raw output of
// `git status --porcelain=v1 -z` captured after the executor attempt.
//
// Each NUL-terminated entry is parsed into a single reviewable path (the new
// path for renames/copies, the only path otherwise), normalized to slash
// form, deduplicated, and filtered. The function drops:
//
// - empty entries and entries that normalize to ".";
// - non-local entries (filepath.IsLocal rejects absolute paths,
// drive-letter paths, and any segment that backs out of cwd so the
// review-staging set cannot widen beyond the executor's working
// directory regardless of how git status formatted the entry);
// - destinations in excludeDestinations — these are task.files entries
// declared with commit:false, which Galley materializes as context-only
// inputs the executor reads but does not submit.
//
// The order of returned paths follows the first occurrence in the porcelain
// stream so the staging command argv is deterministic across runs of the
// same input.
func reviewablePathsFromStatus(statusZ string, excludeDestinations []string) []string {
excludes := normalizedPathSet(excludeDestinations)
raw := parseStatusPorcelainZ(statusZ)
seen := make(map[string]bool, len(raw))
var result []string
for _, p := range raw {
clean := normalizeReviewablePath(p)
if clean == "" {
continue
}
// Reject any path that escapes the worktree root. filepath.IsLocal
// returns false for absolute paths, drive-letter paths, and any
// segment that backs out of cwd, so the review-staging set cannot
// widen beyond the executor's working directory regardless of how
// git status formatted the entry.
if !filepath.IsLocal(clean) {
continue
}
if excludes[clean] {
continue
}
if seen[clean] {
continue
}
seen[clean] = true
result = append(result, clean)
}
return result
}

// parseStatusPorcelainZ parses NUL-separated `git status --porcelain=v1 -z`
// output into the ordered list of paths the executor changed. Each entry has
// the byte layout "XY<sp>path<NUL>" for regular changes; rename/copy entries
// (X or Y in {R,C}) append a second "<sp>oldpath<NUL>" token after the new
// path. The parser yields only the new path so the staged review set tracks
// the post-rename surface git presents to a reviewer.
//
// Malformed tails (a header without its NUL-terminated path) cause the
// parser to stop where it can no longer be sure of the next path boundary;
// returning a partial path would risk staging unrelated content.
func parseStatusPorcelainZ(s string) []string {
var paths []string
rest := s
for len(rest) > 0 {
// Each entry begins with two status bytes and a separator before the
// path. Anything shorter is a malformed tail; stop instead of
// returning a partial path.
if len(rest) < 4 {
return paths
}
x := rest[0]
y := rest[1]
// Skip the "XY " prefix. git uses a single space here for
// --porcelain=v1; the parser does not need to tolerate a tab because
// the v1 format documents a literal SP.
rest = rest[3:]
idx := strings.IndexByte(rest, 0)
if idx < 0 {
return paths
}
path := rest[:idx]
rest = rest[idx+1:]
if isRenameOrCopyStatus(x) || isRenameOrCopyStatus(y) {
// Rename/copy adds a second NUL-terminated token for the old
// path; we only care about the new path, so consume and discard
// the old one.
idx2 := strings.IndexByte(rest, 0)
if idx2 < 0 {
if path != "" {
paths = append(paths, path)
}
return paths
}
rest = rest[idx2+1:]
}
if path != "" {
paths = append(paths, path)
}
}
return paths
}

func isRenameOrCopyStatus(c byte) bool {
return c == 'R' || c == 'C'
}

// normalizedPathSet returns the set of worktree-relative paths in `paths`,
// normalized to slash form and with empty / "." entries dropped. The
// returned map is suitable for membership tests against
// normalizeReviewablePath output.
func normalizedPathSet(paths []string) map[string]bool {
set := make(map[string]bool, len(paths))
for _, p := range paths {
clean := normalizeReviewablePath(p)
if clean == "" {
continue
}
set[clean] = true
}
return set
}

// normalizeReviewablePath produces the canonical slash-form representation
// of a worktree-relative path used by the review-staging set. Returns ""
// when the input collapses to an empty path or to "." so callers can use
// the empty string as a sentinel for "drop this entry".
func normalizeReviewablePath(p string) string {
clean := strings.TrimSpace(p)
if clean == "" {
return ""
}
clean = filepath.ToSlash(filepath.Clean(clean))
if clean == "" || clean == "." {
return ""
}
return clean
}

// nonCommittedInputDestinations returns the worktree-relative destinations
// of task input files declared with commit:false. These are context-only
// inputs Galley materializes in the worktree before the executor runs;
// review-time staging must keep them out of the supervisor diff so
// reviewable evidence only reflects executor-produced changes (AC4 /
// supervisor feedback on review-time scope).
func nonCommittedInputDestinations(files []task.InputFile) []string {
var paths []string
for _, f := range files {
if f.Commit {
continue
}
dest := strings.TrimSpace(f.Destination)
if dest == "" {
continue
}
paths = append(paths, dest)
}
return paths
}
Loading
Loading