shinpr · shinpr · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json
@@ -9,7 +9,7 @@
       "name": "galley",
       "source": "./plugins/galley",
       "description": "Create, validate, queue, set up, and troubleshoot Galley AFK task workflows.",
-      "version": "0.1.12",
+      "version": "0.1.13",
       "author": {
         "name": "Shinsuke Kagawa",
         "url": "https://github.com/shinpr"

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -64,7 +64,7 @@ jobs:
         shell: powershell
         run: |
           $binDir = Join-Path $env:RUNNER_TEMP "galley-ps-bin"
-          powershell -NoProfile -ExecutionPolicy Bypass -File scripts/install.ps1 -BinDir $binDir
+          powershell -NoProfile -ExecutionPolicy Bypass -File scripts/install.ps1 -Local -BinDir $binDir
           & (Join-Path $binDir "galley.exe") --help | Out-Null
 
       - name: Validate examples

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,15 @@ This project follows semantic versioning.
 
 ## Unreleased
 
+### Added
+
+- Setup executor preflight phase and `environment.yaml` `setup.commands[]`. Galley now prepares fresh task worktrees before acceptance skeleton creation and implementation by running authored setup commands or dispatching a setup executor to learn a reusable setup plan, which is persisted back to `environment.yaml` on success.
+- Setup run evidence and failure routing. Setup now writes `setup_result.json` and, when a learned plan is persisted, `environment_update.json`; setup failures are classified as `phase: setup`, `kind: setup_failed` with repair guidance for `environment.setup`.
+
+### Changed
+
+- Packaged Claude and Codex Galley plugins are now versioned as `0.1.13`: setup executor prompts now include explicit result JSON contracts and troubleshooting guidance routes `setup_failed` diagnosis through setup run evidence.
+
 ## v0.6.2 - 2026-05-25
 
 ### Changed

diff --git a/docs/profiles.md b/docs/profiles.md
@@ -124,6 +124,7 @@ Supported fields:
 - `pr.comments.enabled`: poll PR comments and accept any comment whose trimmed body starts with `/galley`. The free-form prefix `/galley <request>` is treated as the request, and the aliases `/galley rerun ...` and `/galley requeue ...` remain backward compatible. Mid-line mentions or `/galley` lines that are not the first non-whitespace token of the comment are ignored. Trust boundary: a `/galley` command is accepted only when the comment author's login matches the PR author login recorded on the task (`pr.author_login`, persisted at PR creation time). Comments that fail this check are marked processed without requeueing; when `pr.comments.reply` is enabled, Galley posts a concise rejection reply. Task files without a recorded `pr.author_login` (older runs) fail closed.
 - `pr.comments.reply`: post a concise acknowledgement after handling a Galley PR comment. Replies do not echo the user-supplied request body; the parsed request text is preserved on the requeued task as a `RevisionRequest` so the executor still receives the user's intent.
 - `worktree.cleanup`: remove managed task worktrees for closed or merged PR tasks, including uncommitted or generated files left in those worktrees.
+- `setup.commands[]`: optional ordered list of commands Galley runs as the setup phase before the acceptance skeleton preflight and before the implementation executor. Each entry has a `run` shell command and an optional human-readable `why`. When `setup` is present, Galley runs the listed commands inside the prepared worktree and, on success, verifies readiness by executing one representative `quality.required_checks` command before declaring the worktree ready. When all authored commands and the readiness check succeed, Galley proceeds to the executor. When any authored command or the readiness check fails, Galley falls back to the setup executor (Claude or Codex, per task `executor.cli`) to discover a working plan; on success the learned plan is atomically written back to this file. When `setup` is absent, the setup executor discovers and persists a plan here so subsequent tasks reuse it without rediscovery. The setup phase writes `runs/<run-id>/setup_result.json` (attempted commands, readiness evidence, source, repair guidance) and, when a learned plan is persisted, `runs/<run-id>/environment_update.json` (profile rewrite audit record). See `examples/environment-local.yaml` for a worked example.
 
 Validate an environment profile:
 

diff --git a/examples/environment-local.yaml b/examples/environment-local.yaml
@@ -23,3 +23,13 @@ pr:
     reply: true
 worktree:
   cleanup: true
+# setup defines how Galley prepares a fresh task worktree before the
+# implementation executor begins. When this block is absent, Galley dispatches
+# a setup executor that discovers a working plan and writes it back here so
+# subsequent tasks reuse the learned setup without rediscovery.
+setup:
+  commands:
+    - run: "go mod download"
+      why: "fetch Go module cache so build/test commands run hermetically"
+    - run: "go build ./..."
+      why: "fail fast if the workspace cannot compile before the executor runs"
diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go
@@ -470,7 +470,7 @@ func processClaimedTask(ctx, shutdownCtx context.Context, opts Options, runningP
 	// start-point ref to the brand-new task branch instead of inheriting
 	// the source repository's current HEAD. The resolved bundle is threaded
 	// into runSupervisorLoop so the supervisor loop never re-loads it.
-	profiles, err := loadAndPersistTaskProfiles(opts, &loaded, runDir)
+	profiles, resolvedProfiles, err := loadAndPersistTaskProfiles(opts, &loaded, runDir)
 	if err != nil {
 		appendFailureAttempt(&loaded, "run_evidence", "run_evidence_failed", err, runDir)
 		return taskstate.FailMove(opts.Root, runningPath, &loaded, err)
@@ -480,6 +480,37 @@ func processClaimedTask(ctx, shutdownCtx context.Context, opts Options, runningP
 	if err != nil {
 		return taskstate.FailMove(opts.Root, runningPath, &loaded, err)
 	}
+	// Setup executor preflight runs after the worktree and input files are
+	// prepared, before acceptance skeleton preflight, and before any executor
+	// attempt (AC2, AC10). When environment.setup is present the daemon runs
+	// that authored plan directly; when absent the setup executor (Claude or
+	// Codex per task.executor.cli) attempts to make the worktree ready and may
+	// return a learned plan that Galley persists back to environment.yaml
+	// (AC3, AC4, AC6, AC7). Setup readiness excludes acceptance skeleton
+	// obligations (AC10).
+	setupRes, setupUpdate, setupErr := SetupExecutorPreflight(ctx, SetupExecutorPreflightOptions{
+		Task:                   loaded,
+		WorkDir:                prepared.CWD,
+		RunDir:                 runDir,
+		Profiles:               profiles,
+		ClaudeBin:              opts.ClaudeBin,
+		CodexBin:               opts.CodexBin,
+		EnvironmentProfilePath: resolvedProfiles.EnvironmentProfileFile,
+	})
+	if setupErr != nil {
+		appendFailureAttempt(&loaded, SetupPhase, SetupFailedKind, setupErr, runDir)
+		return taskstate.FailMove(opts.Root, runningPath, &loaded, setupErr)
+	}
+	// Apply setup readiness evidence (and any persisted profile change) to the
+	// running task before the implementation work order is built so the
+	// supervisor and executor share the same readiness facts (AC8).
+	applySetupResultToTask(&loaded, setupRes, setupUpdate)
+	if setupRes != nil {
+		if err := task.Save(runningPath, loaded); err != nil {
+			appendFailureAttempt(&loaded, SetupPhase, SetupFailedKind, err, runDir)
+			return taskstate.FailMove(opts.Root, runningPath, &loaded, err)
+		}
+	}
 	// Optional acceptance skeleton preflight runs after inputfiles.Prepare and
 	// before the first executor attempt. The stage is a no-op when the task
 	// omits preflight.acceptance_skeleton.enabled or sets it to false (R1,
@@ -518,18 +549,79 @@ func processClaimedTask(ctx, shutdownCtx context.Context, opts Options, runningP
 // run directory. The same shape that loop.go's loadSupervisorProfiles wrote
 // previously is preserved so existing readers of profiles.json continue to
 // work.
-func loadAndPersistTaskProfiles(opts Options, loaded *task.Task, runDir string) (profile.Bundle, error) {
+func loadAndPersistTaskProfiles(opts Options, loaded *task.Task, runDir string) (profile.Bundle, resolvedProfileFiles, error) {
 	resolved, profiles, err := loadTaskProfiles(opts, loaded.Scope.CWD)
 	if err != nil {
-		return profile.Bundle{}, err
+		return profile.Bundle{}, resolvedProfileFiles{}, err
 	}
 	if err := writeJSON(filepath.Join(runDir, "profiles.json"), struct {
 		Resolved resolvedProfileFiles `json:"resolved"`
 		Bundle   profile.Bundle       `json:"bundle"`
 	}{Resolved: resolved, Bundle: profiles}); err != nil {
-		return profile.Bundle{}, err
+		return profile.Bundle{}, resolvedProfileFiles{}, err
+	}
+	return profiles, resolved, nil
+}
+
+// applySetupResultToTask records setup readiness evidence on the running task
+// so the implementation work order and supervisor evidence carry the same
+// facts. The setup outcome is also appended to task.verification.commands so
+// the task verification history and rendered PR/task output always include the
+// setup readiness fact (AC8) — including the unchanged-setup case, where no
+// environment.yaml change is recorded. When a learned plan was persisted to
+// environment.yaml the change is additionally surfaced as a Risk-style note so
+// PR/task output reflects the profile update.
+func applySetupResultToTask(loaded *task.Task, res *SetupResult, update *SetupEnvironmentUpdate) {
+	if loaded == nil || res == nil {
+		return
+	}
+	note := fmt.Sprintf("setup status=%s commands=%d", res.Status, len(res.Commands))
+	if res.ReadinessEvidence != "" {
+		note = note + " — " + res.ReadinessEvidence
+	}
+	// AC8: persist setup evidence in task.verification.commands so it shows up
+	// in the task verification history and the rendered PR/task output. The
+	// command label is a stable pseudo-command operators can recognize even
+	// without inspecting the run directory, and the excerpt names the setup
+	// source so readers can tell authored vs learned without opening
+	// setup_result.json.
+	setupCmd := "<galley:setup>"
+	if res.Provider != "" {
+		setupCmd = fmt.Sprintf("<galley:setup:%s>", res.Provider)
+	}
+	excerpt := note + fmt.Sprintf(" source=%s", res.Source)
+	if update != nil && update.Changed {
+		excerpt = excerpt + fmt.Sprintf(" environment.yaml=%s (%s)", update.ProfilePath, update.Reason)
+	} else if res.Status == SetupStatusReady {
+		excerpt = excerpt + " environment.yaml=unchanged"
+	}
+	loaded.Verification.Commands = append(loaded.Verification.Commands, task.VerificationCommand{
+		Cmd:           setupCmd,
+		Status:        setupVerificationStatus(res.Status),
+		OutputExcerpt: excerpt,
+	})
+	if update != nil && update.Changed {
+		// Surface profile changes as a Risk-style entry so task/PR output
+		// records that environment.yaml setup was rewritten.
+		loaded.Risks = append(loaded.Risks, task.Risk{
+			ID:     fmt.Sprintf("setup-profile-updated-%d", len(loaded.Risks)+1),
+			Type:   "technical_debt",
+			Detail: fmt.Sprintf("Setup executor persisted a learned plan to %s (%s). %s", update.ProfilePath, update.Reason, note),
+		})
+	}
+}
+
+// setupVerificationStatus maps SetupResult.Status to the canonical
+// VerificationCommand status vocabulary used by task verification history.
+func setupVerificationStatus(s string) string {
+	switch s {
+	case SetupStatusReady:
+		return "passed"
+	case SetupStatusFailed:
+		return "failed"
+	default:
+		return "skipped"
 	}
-	return profiles, nil
 }
 
 func loadClaimedTask(runningPath string) (task.Task, error) {

diff --git a/internal/daemon/loop.go b/internal/daemon/loop.go
@@ -71,6 +71,11 @@ func runSupervisorLoop(ctx, shutdownCtx context.Context, opts Options, runningPa
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "galley: could not load preflight result for run %s: %v\n", runID, err)
 	}
+	// Setup result is loaded from runs/<run-id>/setup_result.json which the
+	// setup executor preflight wrote before this loop. It is appended to the
+	// implementation work order so the executor sees the readiness facts and
+	// threaded into supervisor evidence so reviewers can verify them (AC8).
+	setupResultEvidence, setupUpdateEvidence := loadSetupRunEvidence(runDir, runID)
 	promptTask := executionTask(*loaded, prepared.CWD)
 	if preflightResult != nil {
 		// Runtime obligations below are the source of truth after preflight.
@@ -89,6 +94,9 @@ func runSupervisorLoop(ctx, shutdownCtx context.Context, opts Options, runningPa
 	if preflightResult != nil {
 		prompt = appendPreflightObligations(prompt, preflightResult)
 	}
+	if setupResultEvidence != nil {
+		prompt = appendSetupReadinessObligations(prompt, setupResultEvidence, setupUpdateEvidence)
+	}
 	budget := attemptBudget(loaded.ExecutionPolicy.LoopBudget)
 	consecutiveNoDiff := 0
 	for attempt := 1; budget < 0 || attempt <= budget; attempt++ {
@@ -203,18 +211,21 @@ func runOneSupervisorAttempt(ctx context.Context, req supervisorAttemptRequest)
 		appendFailureAttempt(req.Loaded, "executor", classifyFailureKind("executor_failed", err), err, attemptDir)
 		return attemptReview{}, err
 	}
+	setupResultEvidence, setupUpdateEvidence := loadSetupRunEvidence(req.RunDir, req.RunID)
 	evidence := supervisor.Evidence{
-		Task:            *req.Loaded,
-		Profiles:        req.Profiles,
-		Claude:          outcome.ClaudeResult,
-		ParseError:      outcome.ParseErr,
-		RunError:        outcome.RunErr,
-		DiffDirty:       outcome.DiffDirty,
-		Diff:            outcome.Diff,
-		DiffError:       outcome.DiffErr,
-		Attempt:         req.Attempt,
-		AttemptsLeft:    attemptsLeft(req.Budget, req.Attempt),
-		PreflightResult: preflightOutputs,
+		Task:                   *req.Loaded,
+		Profiles:               req.Profiles,
+		Claude:                 outcome.ClaudeResult,
+		ParseError:             outcome.ParseErr,
+		RunError:               outcome.RunErr,
+		DiffDirty:              outcome.DiffDirty,
+		Diff:                   outcome.Diff,
+		DiffError:              outcome.DiffErr,
+		Attempt:                req.Attempt,
+		AttemptsLeft:           attemptsLeft(req.Budget, req.Attempt),
+		PreflightResult:        preflightOutputs,
+		SetupResult:            setupResultEvidence,
+		SetupEnvironmentUpdate: setupUpdateEvidence,
 	}
 	verdict, err := evaluateSupervisorWithRetry(ctx, req.Opts, evidence, attemptDir, req.Prepared.CWD)
 	if err != nil {