From 89cc4e38d456573d180cd32c3dd82a924f72ce77 Mon Sep 17 00:00:00 2001 From: Shinsuke Kagawa Date: Mon, 25 May 2026 20:22:21 +0900 Subject: [PATCH 1/6] galley: task-20260525094613-f0e611-add-setup-executor-preflight-and-learned-environment-setup --- CHANGELOG.md | 7 + docs/profiles.md | 1 + examples/environment-local.yaml | 10 + internal/daemon/daemon.go | 102 +- internal/daemon/loop.go | 33 +- internal/daemon/setup_executor_preflight.go | 914 +++++++++++++++++ .../daemon/setup_executor_preflight_test.go | 917 ++++++++++++++++++ internal/daemon/testmain_test.go | 43 +- internal/profile/profile.go | 171 ++++ internal/profile/schema.go | 12 + internal/supervisor/adapter.go | 54 +- internal/supervisor/supervisor.go | 6 + .../galley/references/environment.schema.json | 30 + .../galley/references/troubleshooting.md | 19 + prompts/embed.go | 24 + prompts/setup-executor-codex.md | 67 ++ prompts/setup-executor.md | 73 ++ schemas/embed.go | 3 + schemas/setup-result.schema.json | 75 ++ 19 files changed, 2518 insertions(+), 43 deletions(-) create mode 100644 internal/daemon/setup_executor_preflight.go create mode 100644 internal/daemon/setup_executor_preflight_test.go create mode 100644 prompts/setup-executor-codex.md create mode 100644 prompts/setup-executor.md create mode 100644 schemas/setup-result.schema.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 60f8c5e..34ef924 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ This project follows semantic versioning. ## Unreleased +### Added + +- Setup executor preflight phase. After the worktree and input files are prepared and before the acceptance skeleton preflight and implementation executor, Galley runs a setup phase that either executes the operator-authored `environment.setup.commands` or, when absent, dispatches a setup executor (Claude or Codex per task `executor.cli`) to discover and validate a working setup plan. On success the learned plan is atomically written back to `environment.yaml` so subsequent tasks reuse it without rediscovery. +- `environment.yaml` now accepts a top-level `setup` block whose `commands[]` list is ordered shell commands with optional `why` annotations. The bundled environment schema requires `commands` to be non-empty and matches `ValidateEnvironment`. +- New run evidence artifacts: `runs//setup_result.json` records attempted commands, command source (`environment_setup`, `environment_commands`, `readiness_check`, or `discovered`), inspected files, readiness evidence, and repair guidance; `runs//environment_update.json` records the before/after diff and rewrite reason when Galley persists a learned plan. +- Setup failure classification. A failed setup phase records `phase: setup`, `kind: setup_failed` on the task and surfaces attempted commands, inspected files, stdout/stderr excerpts, and repair guidance in `setup_result.json` so operators can repair `environment.setup` and requeue. + ## v0.6.2 - 2026-05-25 ### Changed diff --git a/docs/profiles.md b/docs/profiles.md index faf75f9..188b467 100644 --- a/docs/profiles.md +++ b/docs/profiles.md @@ -124,6 +124,7 @@ Supported fields: - `pr.comments.enabled`: poll PR comments and accept any comment whose trimmed body starts with `/galley`. The free-form prefix `/galley ` is treated as the request, and the aliases `/galley rerun ...` and `/galley requeue ...` remain backward compatible. Mid-line mentions or `/galley` lines that are not the first non-whitespace token of the comment are ignored. Trust boundary: a `/galley` command is accepted only when the comment author's login matches the PR author login recorded on the task (`pr.author_login`, persisted at PR creation time). Comments that fail this check are marked processed without requeueing; when `pr.comments.reply` is enabled, Galley posts a concise rejection reply. Task files without a recorded `pr.author_login` (older runs) fail closed. - `pr.comments.reply`: post a concise acknowledgement after handling a Galley PR comment. Replies do not echo the user-supplied request body; the parsed request text is preserved on the requeued task as a `RevisionRequest` so the executor still receives the user's intent. - `worktree.cleanup`: remove managed task worktrees for closed or merged PR tasks, including uncommitted or generated files left in those worktrees. +- `setup.commands[]`: optional ordered list of commands Galley runs as the setup phase before the acceptance skeleton preflight and before the implementation executor. Each entry has a `run` shell command and an optional human-readable `why`. When `setup` is present, Galley runs the listed commands inside the prepared worktree and, on success, verifies readiness by executing one representative `quality.required_checks` command before declaring the worktree ready. When all authored commands and the readiness check succeed, Galley proceeds to the executor. When any authored command or the readiness check fails, Galley falls back to the setup executor (Claude or Codex, per task `executor.cli`) to discover a working plan; on success the learned plan is atomically written back to this file. When `setup` is absent, the setup executor discovers and persists a plan here so subsequent tasks reuse it without rediscovery. The setup phase writes `runs//setup_result.json` (attempted commands, readiness evidence, source, repair guidance) and, when a learned plan is persisted, `runs//environment_update.json` (profile rewrite audit record). See `examples/environment-local.yaml` for a worked example. Validate an environment profile: diff --git a/examples/environment-local.yaml b/examples/environment-local.yaml index a0a5b51..108482d 100644 --- a/examples/environment-local.yaml +++ b/examples/environment-local.yaml @@ -23,3 +23,13 @@ pr: reply: true worktree: cleanup: true +# setup defines how Galley prepares a fresh task worktree before the +# implementation executor begins. When this block is absent, Galley dispatches +# a setup executor that discovers a working plan and writes it back here so +# subsequent tasks reuse the learned setup without rediscovery. +setup: + commands: + - run: "go mod download" + why: "fetch Go module cache so build/test commands run hermetically" + - run: "go build ./..." + why: "fail fast if the workspace cannot compile before the executor runs" diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 2991b42..3a216a2 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -470,7 +470,7 @@ func processClaimedTask(ctx, shutdownCtx context.Context, opts Options, runningP // start-point ref to the brand-new task branch instead of inheriting // the source repository's current HEAD. The resolved bundle is threaded // into runSupervisorLoop so the supervisor loop never re-loads it. - profiles, err := loadAndPersistTaskProfiles(opts, &loaded, runDir) + profiles, resolvedProfiles, err := loadAndPersistTaskProfiles(opts, &loaded, runDir) if err != nil { appendFailureAttempt(&loaded, "run_evidence", "run_evidence_failed", err, runDir) return taskstate.FailMove(opts.Root, runningPath, &loaded, err) @@ -480,6 +480,37 @@ func processClaimedTask(ctx, shutdownCtx context.Context, opts Options, runningP if err != nil { return taskstate.FailMove(opts.Root, runningPath, &loaded, err) } + // Setup executor preflight runs after the worktree and input files are + // prepared, before acceptance skeleton preflight, and before any executor + // attempt (AC2, AC10). When environment.setup is present the daemon runs + // that authored plan directly; when absent the setup executor (Claude or + // Codex per task.executor.cli) attempts to make the worktree ready and may + // return a learned plan that Galley persists back to environment.yaml + // (AC3, AC4, AC6, AC7). Setup readiness excludes acceptance skeleton + // obligations (AC10). + setupRes, setupUpdate, setupErr := SetupExecutorPreflight(ctx, SetupExecutorPreflightOptions{ + Task: loaded, + WorkDir: prepared.CWD, + RunDir: runDir, + Profiles: profiles, + ClaudeBin: opts.ClaudeBin, + CodexBin: opts.CodexBin, + EnvironmentProfilePath: resolvedProfiles.EnvironmentProfileFile, + }) + if setupErr != nil { + appendFailureAttempt(&loaded, SetupPhase, SetupFailedKind, setupErr, runDir) + return taskstate.FailMove(opts.Root, runningPath, &loaded, setupErr) + } + // Apply setup readiness evidence (and any persisted profile change) to the + // running task before the implementation work order is built so the + // supervisor and executor share the same readiness facts (AC8). + applySetupResultToTask(&loaded, setupRes, setupUpdate) + if setupRes != nil { + if err := task.Save(runningPath, loaded); err != nil { + appendFailureAttempt(&loaded, SetupPhase, SetupFailedKind, err, runDir) + return taskstate.FailMove(opts.Root, runningPath, &loaded, err) + } + } // Optional acceptance skeleton preflight runs after inputfiles.Prepare and // before the first executor attempt. The stage is a no-op when the task // omits preflight.acceptance_skeleton.enabled or sets it to false (R1, @@ -518,18 +549,79 @@ func processClaimedTask(ctx, shutdownCtx context.Context, opts Options, runningP // run directory. The same shape that loop.go's loadSupervisorProfiles wrote // previously is preserved so existing readers of profiles.json continue to // work. -func loadAndPersistTaskProfiles(opts Options, loaded *task.Task, runDir string) (profile.Bundle, error) { +func loadAndPersistTaskProfiles(opts Options, loaded *task.Task, runDir string) (profile.Bundle, resolvedProfileFiles, error) { resolved, profiles, err := loadTaskProfiles(opts, loaded.Scope.CWD) if err != nil { - return profile.Bundle{}, err + return profile.Bundle{}, resolvedProfileFiles{}, err } if err := writeJSON(filepath.Join(runDir, "profiles.json"), struct { Resolved resolvedProfileFiles `json:"resolved"` Bundle profile.Bundle `json:"bundle"` }{Resolved: resolved, Bundle: profiles}); err != nil { - return profile.Bundle{}, err + return profile.Bundle{}, resolvedProfileFiles{}, err + } + return profiles, resolved, nil +} + +// applySetupResultToTask records setup readiness evidence on the running task +// so the implementation work order and supervisor evidence carry the same +// facts. The setup outcome is also appended to task.verification.commands so +// the task verification history and rendered PR/task output always include the +// setup readiness fact (AC8) — including the unchanged-setup case, where no +// environment.yaml change is recorded. When a learned plan was persisted to +// environment.yaml the change is additionally surfaced as a Risk-style note so +// PR/task output reflects the profile update. +func applySetupResultToTask(loaded *task.Task, res *SetupResult, update *SetupEnvironmentUpdate) { + if loaded == nil || res == nil { + return + } + note := fmt.Sprintf("setup status=%s commands=%d", res.Status, len(res.Commands)) + if res.ReadinessEvidence != "" { + note = note + " — " + res.ReadinessEvidence + } + // AC8: persist setup evidence in task.verification.commands so it shows up + // in the task verification history and the rendered PR/task output. The + // command label is a stable pseudo-command operators can recognize even + // without inspecting the run directory, and the excerpt names the setup + // source so readers can tell authored vs learned without opening + // setup_result.json. + setupCmd := "" + if res.Provider != "" { + setupCmd = fmt.Sprintf("", res.Provider) + } + excerpt := note + fmt.Sprintf(" source=%s", res.Source) + if update != nil && update.Changed { + excerpt = excerpt + fmt.Sprintf(" environment.yaml=%s (%s)", update.ProfilePath, update.Reason) + } else if res.Status == SetupStatusReady { + excerpt = excerpt + " environment.yaml=unchanged" + } + loaded.Verification.Commands = append(loaded.Verification.Commands, task.VerificationCommand{ + Cmd: setupCmd, + Status: setupVerificationStatus(res.Status), + OutputExcerpt: excerpt, + }) + if update != nil && update.Changed { + // Surface profile changes as a Risk-style entry so task/PR output + // records that environment.yaml setup was rewritten. + loaded.Risks = append(loaded.Risks, task.Risk{ + ID: fmt.Sprintf("setup-profile-updated-%d", len(loaded.Risks)+1), + Type: "technical_debt", + Detail: fmt.Sprintf("Setup executor persisted a learned plan to %s (%s). %s", update.ProfilePath, update.Reason, note), + }) + } +} + +// setupVerificationStatus maps SetupResult.Status to the canonical +// VerificationCommand status vocabulary used by task verification history. +func setupVerificationStatus(s string) string { + switch s { + case SetupStatusReady: + return "passed" + case SetupStatusFailed: + return "failed" + default: + return "skipped" } - return profiles, nil } func loadClaimedTask(runningPath string) (task.Task, error) { diff --git a/internal/daemon/loop.go b/internal/daemon/loop.go index 6593b76..a5d57e7 100644 --- a/internal/daemon/loop.go +++ b/internal/daemon/loop.go @@ -71,6 +71,11 @@ func runSupervisorLoop(ctx, shutdownCtx context.Context, opts Options, runningPa if err != nil { fmt.Fprintf(os.Stderr, "galley: could not load preflight result for run %s: %v\n", runID, err) } + // Setup result is loaded from runs//setup_result.json which the + // setup executor preflight wrote before this loop. It is appended to the + // implementation work order so the executor sees the readiness facts and + // threaded into supervisor evidence so reviewers can verify them (AC8). + setupResultEvidence, setupUpdateEvidence := loadSetupRunEvidence(runDir, runID) promptTask := executionTask(*loaded, prepared.CWD) if preflightResult != nil { // Runtime obligations below are the source of truth after preflight. @@ -89,6 +94,9 @@ func runSupervisorLoop(ctx, shutdownCtx context.Context, opts Options, runningPa if preflightResult != nil { prompt = appendPreflightObligations(prompt, preflightResult) } + if setupResultEvidence != nil { + prompt = appendSetupReadinessObligations(prompt, setupResultEvidence, setupUpdateEvidence) + } budget := attemptBudget(loaded.ExecutionPolicy.LoopBudget) consecutiveNoDiff := 0 for attempt := 1; budget < 0 || attempt <= budget; attempt++ { @@ -203,18 +211,21 @@ func runOneSupervisorAttempt(ctx context.Context, req supervisorAttemptRequest) appendFailureAttempt(req.Loaded, "executor", classifyFailureKind("executor_failed", err), err, attemptDir) return attemptReview{}, err } + setupResultEvidence, setupUpdateEvidence := loadSetupRunEvidence(req.RunDir, req.RunID) evidence := supervisor.Evidence{ - Task: *req.Loaded, - Profiles: req.Profiles, - Claude: outcome.ClaudeResult, - ParseError: outcome.ParseErr, - RunError: outcome.RunErr, - DiffDirty: outcome.DiffDirty, - Diff: outcome.Diff, - DiffError: outcome.DiffErr, - Attempt: req.Attempt, - AttemptsLeft: attemptsLeft(req.Budget, req.Attempt), - PreflightResult: preflightOutputs, + Task: *req.Loaded, + Profiles: req.Profiles, + Claude: outcome.ClaudeResult, + ParseError: outcome.ParseErr, + RunError: outcome.RunErr, + DiffDirty: outcome.DiffDirty, + Diff: outcome.Diff, + DiffError: outcome.DiffErr, + Attempt: req.Attempt, + AttemptsLeft: attemptsLeft(req.Budget, req.Attempt), + PreflightResult: preflightOutputs, + SetupResult: setupResultEvidence, + SetupEnvironmentUpdate: setupUpdateEvidence, } verdict, err := evaluateSupervisorWithRetry(ctx, req.Opts, evidence, attemptDir, req.Prepared.CWD) if err != nil { diff --git a/internal/daemon/setup_executor_preflight.go b/internal/daemon/setup_executor_preflight.go new file mode 100644 index 0000000..fb65c2f --- /dev/null +++ b/internal/daemon/setup_executor_preflight.go @@ -0,0 +1,914 @@ +// Package daemon — Setup Executor Preflight stage. +// +// SetupExecutorPreflight prepares a fresh task worktree before the acceptance +// skeleton preflight and before the implementation executor. It runs after +// inputfiles.Prepare so the prepared worktree (with input files staged) is the +// state the setup acts on, and before AcceptanceSkeletonPreflight so setup +// readiness is verified independently of any task-specific skeletons (AC2, +// AC10). +// +// When environment.setup is present the stage executes the authored plan +// directly (AC2). When environment.setup is absent the stage dispatches a +// setup executor (Claude or Codex per task.executor.cli) that may discover and +// return a successful setup plan (AC3, AC4). On success the daemon persists +// runs//setup_result.json (AC8) and, when the resolved environment +// profile lacked a setup field or the learned plan differs, atomically rewrites +// the repository environment.yaml setup field (AC7) and records the change in +// runs//environment_update.json so a subsequent task reuses the learned +// setup without rediscovery. +// +// On failure the stage classifies the error with phase "setup" and kind +// "setup_failed" (AC9) and writes the setup_result.json failure record with +// attempted commands, command source, inspected files, stdout/stderr excerpts, +// and repair guidance so the operator can fix environment.setup before the next +// attempt. +package daemon + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/shinpr/galley/internal/profile" + "github.com/shinpr/galley/internal/runner" + claudeguard "github.com/shinpr/galley/internal/runner/claude_guard_plugin" + "github.com/shinpr/galley/internal/task" + "github.com/shinpr/galley/prompts" + "github.com/shinpr/galley/schemas" +) + +// SetupResultStatus enumerates the setup phase outcomes Galley records. +const ( + SetupStatusReady = "ready" + SetupStatusFailed = "failed" +) + +// Setup-phase classification constants used by daemon failure routing (AC9). +const ( + SetupPhase = "setup" + SetupFailedKind = "setup_failed" +) + +// Source values recorded in SetupCommandAttempt.Source so reviewers can +// distinguish authored vs learned commands (AC6). +const ( + SetupSourceEnvironmentSetup = "environment_setup" + SetupSourceEnvironmentCommands = "environment_commands" + SetupSourceDiscovered = "discovered" + SetupSourceReadinessCheck = "readiness_check" +) + +// SetupResult is the runtime source-of-truth output of the setup executor +// preflight. It is serialized to runs//setup_result.json (AC8). +type SetupResult struct { + Status string `json:"status" yaml:"status"` + Commands []SetupCommandAttempt `json:"commands" yaml:"commands"` + SuccessfulCommands []profile.SetupCommand `json:"successful_commands,omitempty" yaml:"successful_commands,omitempty"` + InspectedFiles []string `json:"inspected_files,omitempty" yaml:"inspected_files,omitempty"` + ReadinessEvidence string `json:"readiness_evidence,omitempty" yaml:"readiness_evidence,omitempty"` + RepairGuidance string `json:"repair_guidance,omitempty" yaml:"repair_guidance,omitempty"` + Error string `json:"error,omitempty" yaml:"error,omitempty"` + Provider string `json:"provider,omitempty" yaml:"provider,omitempty"` + Source string `json:"source,omitempty" yaml:"source,omitempty"` +} + +// SetupCommandAttempt is one command Galley executed (either directly from the +// authored plan or via the setup executor's run). Stdout/stderr are truncated +// excerpts; the full subprocess output remains in the run directory log files. +type SetupCommandAttempt struct { + Run string `json:"run" yaml:"run"` + Why string `json:"why,omitempty" yaml:"why,omitempty"` + Source string `json:"source" yaml:"source"` + ExitCode int `json:"exit_code" yaml:"exit_code"` + StdoutExcerpt string `json:"stdout_excerpt,omitempty" yaml:"stdout_excerpt,omitempty"` + StderrExcerpt string `json:"stderr_excerpt,omitempty" yaml:"stderr_excerpt,omitempty"` +} + +// SetupEnvironmentUpdate is persisted to runs//environment_update.json +// when the daemon rewrites the repository environment.yaml setup field (AC7, +// AC8). +type SetupEnvironmentUpdate struct { + ProfilePath string `json:"profile_path"` + Changed bool `json:"changed"` + Before *profile.SetupPlan `json:"before,omitempty"` + After profile.SetupPlan `json:"after"` + Reason string `json:"reason"` + UpdatedAt string `json:"updated_at"` +} + +// SetupExecutorPreflightOptions configures one preflight invocation. The +// EnvironmentProfilePath is required when the daemon should persist a learned +// setup plan back to the repository environment profile (AC7). +type SetupExecutorPreflightOptions struct { + Task task.Task + WorkDir string + RunDir string + Profiles profile.Bundle + ClaudeBin string + CodexBin string + EnvironmentProfilePath string + // RepositorySignals is an optional list of repository setup signal paths + // (manifests, lockfiles, setup docs) that Galley surfaces to the setup + // executor. The daemon discovers these from the worktree before invoking the + // executor so the prompt context is grounded in real repository evidence. + RepositorySignals []string +} + +// setupExecutorRunner is the package-level dispatch hook for the setup +// executor. Tests override it to drive the preflight without spawning a real +// claude or codex subprocess. The default runs the real provider via +// runSetupExecutor. +var setupExecutorRunner = runSetupExecutor + +// SetupExecutorPreflight orchestrates the setup phase. It returns the result +// (nil when no setup work was needed and the daemon should skip the stage), +// an optional environment update record, and an error. The error is non-nil +// only on hard preflight failures (Galley-side I/O, contract violations, a +// setup executor that failed to produce a ready worktree, or a learned-plan +// persistence failure that means AC7 cannot be honored for the next task). +func SetupExecutorPreflight(ctx context.Context, opts SetupExecutorPreflightOptions) (*SetupResult, *SetupEnvironmentUpdate, error) { + if opts.RunDir == "" { + return nil, nil, fmt.Errorf("setup preflight: run dir is required") + } + if opts.WorkDir == "" { + return nil, nil, fmt.Errorf("setup preflight: work dir is required") + } + if opts.Profiles.Environment == nil { + // Without an environment profile there is no setup contract to enforce + // and no commands map to learn from. Skip the stage so existing tasks + // without an environment profile keep the prior daemon behavior. + return nil, nil, nil + } + + env := opts.Profiles.Environment + if env.Setup != nil && len(env.Setup.Commands) > 0 { + res, authoredErr := runAuthoredSetupPlan(ctx, opts, env.Setup) + if authoredErr == nil { + // Authored commands all exited 0. Run a representative quality + // required check to prove the worktree is actually ready before + // declaring success (AC2 readiness verification). + runAuthoredReadinessCheck(ctx, opts, res) + } + if res != nil && res.Status == SetupStatusReady { + if err := WriteSetupResult(opts.RunDir, res); err != nil { + return res, nil, err + } + return res, nil, nil + } + // Authored plan failed (either a setup command or the readiness check). + // Per AC6 fall back to the setup executor (discovery) so a stale or + // incorrect authored plan can be repaired by learning a working one. + discovered, derr := setupExecutorRunner(ctx, opts) + if discovered != nil { + res = mergeAuthoredAttemptsIntoDiscovery(res, discovered) + } + if writeErr := WriteSetupResult(opts.RunDir, res); writeErr != nil && derr == nil { + derr = writeErr + } + if derr != nil { + return res, nil, derr + } + if res == nil || res.Status != SetupStatusReady { + return res, nil, fmt.Errorf("setup executor did not make the worktree ready") + } + // Discovery succeeded but produced no successful_commands while the + // authored plan had also failed. Persisting nothing would silently + // leave environment.yaml unchanged with the stale authored plan still + // active — AC7's "no silent unchanged" invariant. Downgrade to failed + // with repair guidance and keep setup_result.json diagnostic. + if vErr := enforceLearnedSetupPlanContract(res, env); vErr != nil { + applySetupContractViolation(res, vErr) + _ = WriteSetupResult(opts.RunDir, res) + return res, nil, fmt.Errorf("setup phase failed: %w", vErr) + } + // Discovery succeeded: persist the learned plan and continue. + update, perr := persistLearnedSetupPlan(opts, env, res) + if perr != nil { + recordSetupProfileUpdateFailure(opts.RunDir, perr) + res.Status = SetupStatusFailed + if res.Error == "" { + res.Error = "learned setup plan persistence failed: " + perr.Error() + } + _ = WriteSetupResult(opts.RunDir, res) + return res, nil, fmt.Errorf("setup phase failed: persist learned setup plan: %w", perr) + } + if update != nil { + if err := WriteSetupEnvironmentUpdate(opts.RunDir, update); err != nil { + return res, nil, err + } + } + return res, update, nil + } + + // No authored plan: dispatch the setup executor. + res, err := setupExecutorRunner(ctx, opts) + if writeErr := WriteSetupResult(opts.RunDir, res); writeErr != nil && err == nil { + err = writeErr + } + if err != nil { + return res, nil, err + } + if res == nil || res.Status != SetupStatusReady { + return res, nil, fmt.Errorf("setup executor did not make the worktree ready") + } + // Discovery reported ready: enforce the learned-plan contract before + // persistence so a ready+empty-successful_commands response cannot silently + // leave environment.yaml unchanged (AC7 invariant). + if vErr := enforceLearnedSetupPlanContract(res, env); vErr != nil { + applySetupContractViolation(res, vErr) + _ = WriteSetupResult(opts.RunDir, res) + return res, nil, fmt.Errorf("setup phase failed: %w", vErr) + } + // On success, persist the learned plan back to the environment profile when + // the resolved profile lacked a setup field (AC7) so subsequent tasks reuse + // it without rediscovery. A failure to persist is treated as a setup-phase + // failure: AC7 explicitly requires that subsequent tasks see the learned + // plan without rediscovery, so silently swallowing the rewrite error would + // re-cost discovery every run. + update, perr := persistLearnedSetupPlan(opts, env, res) + if perr != nil { + recordSetupProfileUpdateFailure(opts.RunDir, perr) + // Promote the persistence failure into the setup result so + // setup_result.json carries the same failure facts as the rest of the + // run evidence (AC8). + if res != nil { + res.Status = SetupStatusFailed + if res.Error == "" { + res.Error = "learned setup plan persistence failed: " + perr.Error() + } + if res.RepairGuidance == "" { + res.RepairGuidance = "Inspect environment_update.json, fix the environment.yaml under runs//setup_result.json, and requeue the task." + } + _ = WriteSetupResult(opts.RunDir, res) + } + return res, nil, fmt.Errorf("setup phase failed: persist learned setup plan: %w", perr) + } + if update != nil { + if err := WriteSetupEnvironmentUpdate(opts.RunDir, update); err != nil { + return res, nil, err + } + } + return res, update, nil +} + +// runAuthoredSetupPlan executes the operator-authored environment.setup +// commands directly. Each command runs through runner.RunCommand inside the +// worktree with stdout/stderr captured to setup_authored.N.{stdout,stderr}.log +// so the operator can inspect a failure even when the daemon reports only the +// terse excerpt in setup_result.json. +func runAuthoredSetupPlan(ctx context.Context, opts SetupExecutorPreflightOptions, plan *profile.SetupPlan) (*SetupResult, error) { + timeout := setupCommandTimeout(opts.Task) + res := &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{}, + SuccessfulCommands: []profile.SetupCommand{}, + Source: SetupSourceEnvironmentSetup, + } + for i, cmd := range plan.Commands { + argv := []string{shellExecutable(opts.Profiles), "-c", cmd.Run} + command := runner.Command{Argv: argv, WorkDir: opts.WorkDir} + stdoutPath := filepath.Join(opts.RunDir, fmt.Sprintf("setup_authored.%d.stdout.log", i+1)) + stderrPath := filepath.Join(opts.RunDir, fmt.Sprintf("setup_authored.%d.stderr.log", i+1)) + out, err := runner.RunCommand(ctx, command, runner.RunOptions{ + Timeout: timeout, + StdoutPath: stdoutPath, + StderrPath: stderrPath, + TailBytes: 8192, + }) + attempt := SetupCommandAttempt{ + Run: cmd.Run, + Why: cmd.Why, + Source: SetupSourceEnvironmentSetup, + ExitCode: out.ExitCode, + StdoutExcerpt: truncateExcerpt(out.Stdout), + StderrExcerpt: truncateExcerpt(out.Stderr), + } + res.Commands = append(res.Commands, attempt) + if err != nil { + res.Status = SetupStatusFailed + res.Error = fmt.Sprintf("authored setup command %d failed (exit %d): %v", i+1, out.ExitCode, err) + res.RepairGuidance = "Inspect setup_authored." + fmt.Sprint(i+1) + ".stderr.log, fix the failing command in environment.yaml setup.commands, and requeue." + return res, fmt.Errorf("setup phase failed: %s", res.Error) + } + res.SuccessfulCommands = append(res.SuccessfulCommands, cmd) + } + res.ReadinessEvidence = fmt.Sprintf("All %d authored environment.setup commands exited 0.", len(plan.Commands)) + return res, nil +} + +// runAuthoredReadinessCheck runs ONE representative required quality check in +// the prepared worktree so the daemon can prove the authored setup plan +// actually made the worktree ready (AC2). When no qualifying required check is +// available the readiness check is skipped and the rationale is recorded in +// ReadinessEvidence. On failure the result is downgraded to setup_failed; the +// failed attempt is recorded with Source="readiness_check" so the caller can +// fall back to discovery. +func runAuthoredReadinessCheck(ctx context.Context, opts SetupExecutorPreflightOptions, res *SetupResult) { + if res == nil { + return + } + check, command := selectAuthoredReadinessCheck(opts) + if command == "" { + // No required check qualifies: keep res ready but record the rationale. + if check != "" { + res.ReadinessEvidence += " (readiness check skipped: required check '" + check + "' has no preferred command)" + } else { + res.ReadinessEvidence += " (no required quality check available — readiness inferred from command success)" + } + return + } + argv := []string{shellExecutable(opts.Profiles), "-c", command} + stdoutPath := filepath.Join(opts.RunDir, "setup_readiness_check.stdout.log") + stderrPath := filepath.Join(opts.RunDir, "setup_readiness_check.stderr.log") + out, err := runner.RunCommand(ctx, runner.Command{Argv: argv, WorkDir: opts.WorkDir}, runner.RunOptions{ + Timeout: setupCommandTimeout(opts.Task), + StdoutPath: stdoutPath, + StderrPath: stderrPath, + TailBytes: 8192, + }) + attempt := SetupCommandAttempt{ + Run: command, + Why: "required-check readiness verification (" + check + ")", + Source: SetupSourceReadinessCheck, + ExitCode: out.ExitCode, + StdoutExcerpt: truncateExcerpt(out.Stdout), + StderrExcerpt: truncateExcerpt(out.Stderr), + } + res.Commands = append(res.Commands, attempt) + if err != nil { + res.Status = SetupStatusFailed + res.Error = fmt.Sprintf("authored setup readiness check '%s' failed (exit %d): %v", check, out.ExitCode, err) + res.RepairGuidance = "Inspect setup_readiness_check.stderr.log; fix environment.yaml setup.commands so the required check '" + check + "' passes, or remove the stale setup field and requeue to let Galley learn a working plan." + return + } + res.ReadinessEvidence += fmt.Sprintf(" Readiness verified by required check '%s' (`%s`).", check, command) +} + +// selectAuthoredReadinessCheck picks the first required quality check that has +// a non-empty preferred command. Returns ("", "") when no check qualifies. +func selectAuthoredReadinessCheck(opts SetupExecutorPreflightOptions) (string, string) { + if opts.Profiles.Quality == nil { + return "", "" + } + for _, check := range opts.Profiles.Quality.RequiredChecks { + if !check.Required { + continue + } + if len(check.PreferredCommands) == 0 { + return check.ID, "" + } + cmd := strings.TrimSpace(check.PreferredCommands[0]) + if cmd == "" { + return check.ID, "" + } + return check.ID, cmd + } + return "", "" +} + +// mergeAuthoredAttemptsIntoDiscovery folds the authored attempts (commands and +// readiness check) into the discovery result so reviewers can see both the +// failed authored plan and the successful discovered plan in one +// setup_result.json (AC6). +func mergeAuthoredAttemptsIntoDiscovery(authored, discovered *SetupResult) *SetupResult { + if discovered == nil { + return authored + } + if authored == nil { + return discovered + } + merged := *discovered + combined := make([]SetupCommandAttempt, 0, len(authored.Commands)+len(discovered.Commands)) + combined = append(combined, authored.Commands...) + combined = append(combined, discovered.Commands...) + merged.Commands = combined + // Discovery is the canonical source when both ran. + merged.Source = SetupSourceDiscovered + if merged.ReadinessEvidence == "" && authored.ReadinessEvidence != "" { + merged.ReadinessEvidence = authored.ReadinessEvidence + } + return &merged +} + +// runSetupExecutor dispatches the setup executor (Claude or Codex per +// task.executor.cli) to attempt to make the worktree ready when no authored +// plan exists. +func runSetupExecutor(ctx context.Context, opts SetupExecutorPreflightOptions) (*SetupResult, error) { + signals := opts.RepositorySignals + if signals == nil { + signals = discoverRepositorySignals(opts.WorkDir) + } + payload, perr := marshalSetupExecutorRequest(opts, signals) + if perr != nil { + return setupExecutorFailureResult("plan setup executor request: "+perr.Error(), "", "", 0, "", "", signals), perr + } + commandPlan, provider, perr := buildSetupExecutorCommandPlan(opts, payload) + if perr != nil { + return setupExecutorFailureResult("plan setup executor command: "+perr.Error(), provider, "", 0, "", "", signals), perr + } + if err := writeSetupExecutorCommandPlan(opts.RunDir, commandPlan); err != nil { + return setupExecutorFailureResult("write setup executor command plan: "+err.Error(), provider, "", 0, "", "", signals), err + } + out, runErr := runSetupExecutorCommand(ctx, opts, commandPlan) + executorRun := fmt.Sprintf("", provider) + parsed, parseErr := resolveSetupExecutorResult(opts, out.Stdout) + if parseErr != nil { + message := "setup executor did not return a valid result: " + parseErr.Error() + if runErr != nil { + message = fmt.Sprintf("setup executor exited %d: %s", out.ExitCode, truncateExcerpt(out.Stderr)) + } + failure := setupExecutorFailureResult(message, provider, executorRun, out.ExitCode, out.Stdout, out.Stderr, signals) + return failure, fmt.Errorf("setup executor failed: %v", parseErr) + } + parsed.Provider = provider + if parsed.Status == "" { + parsed.Status = SetupStatusFailed + } + if runErr != nil && parsed.Status == SetupStatusReady { + // Defensive: the runner reported a non-zero exit but the executor + // declared ready. Trust the runner — readiness without exit 0 is not + // trustworthy. + parsed.Status = SetupStatusFailed + if parsed.Error == "" { + parsed.Error = fmt.Sprintf("setup executor process exited non-zero: %v", runErr) + } + } + return parsed, nil +} + +// setupExecutorFailureResult builds a structured setup failure record with +// enough evidence (AC9) for an operator to repair environment.setup. When the +// failure surfaced from an actual setup executor invocation the caller passes +// the executor run identifier, exit code, stdout/stderr, and inspected files. +// When the failure occurred before the executor could run (request marshaling, +// command plan construction) the executor-specific arguments may be zero +// values and only the message is recorded. +func setupExecutorFailureResult(message, provider, executorRun string, exitCode int, stdout, stderr string, inspected []string) *SetupResult { + res := &SetupResult{ + Status: SetupStatusFailed, + Commands: []SetupCommandAttempt{}, + Error: message, + Provider: provider, + Source: SetupSourceDiscovered, + InspectedFiles: append([]string{}, inspected...), + RepairGuidance: "Inspect runs//setup_executor.stderr.log and runs//setup_executor.stdout.jsonl; ensure environment.setup or environment.commands provides a working plan, then requeue.", + } + if executorRun != "" { + res.Commands = append(res.Commands, SetupCommandAttempt{ + Run: executorRun, + Why: "setup executor invocation", + Source: SetupSourceDiscovered, + ExitCode: exitCode, + StdoutExcerpt: truncateExcerpt(stdout), + StderrExcerpt: truncateExcerpt(stderr), + }) + } + return res +} + +// enforceLearnedSetupPlanContract validates that a setup executor that +// returned status=ready also returned a non-empty successful_commands plan +// whenever Galley would persist a learned plan to environment.yaml — that is, +// whenever environment.setup is absent or the discovery path overrode a +// failed authored plan. A ready response with no successful_commands cannot +// be persisted, and silently skipping persistence would either (a) leave +// environment.yaml without a learned plan so the next task re-pays discovery +// cost, or (b) keep a stale authored plan that Galley already proved was +// broken. Either is a silent violation of AC7's "environment.yaml is not +// silently left unchanged" invariant. The returned error is treated as a +// setup_failed by the caller, which downgrades status and writes +// setup_result.json with repair guidance for the operator. +func enforceLearnedSetupPlanContract(res *SetupResult, env *profile.Environment) error { + if res == nil || res.Status != SetupStatusReady { + return nil + } + if len(res.SuccessfulCommands) > 0 { + return nil + } + if env != nil && env.Setup != nil && len(env.Setup.Commands) > 0 { + return fmt.Errorf("setup executor returned status=ready with no successful_commands; cannot confirm whether the authored environment.setup plan ran or persist a replacement plan") + } + return fmt.Errorf("setup executor returned status=ready with no successful_commands; cannot learn a setup plan to persist to environment.yaml") +} + +// applySetupContractViolation downgrades a result that violated the +// learned-plan contract from ready to failed and ensures repair guidance is +// present so the saved setup_result.json carries enough detail for the +// operator to fix environment.commands or author environment.setup before +// requeuing. +func applySetupContractViolation(res *SetupResult, err error) { + if res == nil || err == nil { + return + } + res.Status = SetupStatusFailed + if res.Error == "" { + res.Error = err.Error() + } + if res.RepairGuidance == "" { + res.RepairGuidance = "Return the ordered successful_commands the setup executor ran (or author environment.setup.commands manually), then requeue. environment.yaml was intentionally left unchanged to avoid silently dropping a stale or unknown setup plan." + } +} + +// persistLearnedSetupPlan writes the successful setup plan back to the +// repository environment profile when the resolved profile lacked a setup +// field or the learned plan differs from the existing one. The rewrite is +// atomic and validates the result before returning (AC7). +func persistLearnedSetupPlan(opts SetupExecutorPreflightOptions, env *profile.Environment, res *SetupResult) (*SetupEnvironmentUpdate, error) { + if res == nil || res.Status != SetupStatusReady { + return nil, nil + } + if len(res.SuccessfulCommands) == 0 { + return nil, nil + } + if opts.EnvironmentProfilePath == "" { + return nil, nil + } + plan := profile.SetupPlan{Commands: append([]profile.SetupCommand{}, res.SuccessfulCommands...)} + if env.Setup != nil && setupPlansEqual(*env.Setup, plan) { + return nil, nil + } + prior, err := profile.UpdateEnvironmentSetup(opts.EnvironmentProfilePath, plan) + if err != nil { + return nil, err + } + reason := "no setup field; persisted learned plan" + if prior != nil { + reason = "learned plan differs from prior setup; updated" + } + return &SetupEnvironmentUpdate{ + ProfilePath: opts.EnvironmentProfilePath, + Changed: true, + Before: prior, + After: plan, + Reason: reason, + UpdatedAt: time.Now().UTC().Format(time.RFC3339Nano), + }, nil +} + +func setupPlansEqual(a, b profile.SetupPlan) bool { + if len(a.Commands) != len(b.Commands) { + return false + } + for i := range a.Commands { + if strings.TrimSpace(a.Commands[i].Run) != strings.TrimSpace(b.Commands[i].Run) { + return false + } + } + return true +} + +func recordSetupProfileUpdateFailure(runDir string, err error) { + payload := map[string]any{ + "changed": false, + "error": err.Error(), + } + _ = writeJSON(filepath.Join(runDir, "environment_update.json"), payload) +} + +// WriteSetupResult persists the source-of-truth setup_result.json (AC8). +func WriteSetupResult(runDir string, res *SetupResult) error { + if runDir == "" { + return fmt.Errorf("run dir is required for setup result") + } + if res == nil { + return nil + } + if err := os.MkdirAll(runDir, 0o700); err != nil { + return fmt.Errorf("create setup run dir: %w", err) + } + return writeJSON(filepath.Join(runDir, "setup_result.json"), res) +} + +// LoadSetupEnvironmentUpdate reads the persisted environment_update.json. +// Returns (nil, nil) when the file does not exist so callers can probe +// unconditionally. +func LoadSetupEnvironmentUpdate(runDir string) (*SetupEnvironmentUpdate, error) { + path := filepath.Join(runDir, "environment_update.json") + data, err := os.ReadFile(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + var update SetupEnvironmentUpdate + if err := json.Unmarshal(data, &update); err != nil { + return nil, err + } + return &update, nil +} + +// loadSetupRunEvidence reads the persisted setup_result.json and +// environment_update.json from runDir. Errors are logged to stderr and the +// helper returns nil, nil so a stale or missing file never blocks the loop. +func loadSetupRunEvidence(runDir, runID string) (*SetupResult, *SetupEnvironmentUpdate) { + res, err := LoadSetupResult(runDir) + if err != nil { + fmt.Fprintf(os.Stderr, "galley: could not load setup result for run %s: %v\n", runID, err) + } + update, uerr := LoadSetupEnvironmentUpdate(runDir) + if uerr != nil { + fmt.Fprintf(os.Stderr, "galley: could not load setup environment update for run %s: %v\n", runID, uerr) + } + return res, update +} + +// appendSetupReadinessObligations adds the setup readiness facts (and any +// learned-plan update) to the work order so the implementation executor sees +// the same readiness evidence the supervisor will review (AC8). +func appendSetupReadinessObligations(prompt string, res *SetupResult, update *SetupEnvironmentUpdate) string { + if res == nil { + return prompt + } + var b strings.Builder + b.WriteString("\n## Setup Readiness (Runtime)\n\n") + fmt.Fprintf(&b, "Galley setup phase status: %s (commands attempted: %d).\n", res.Status, len(res.Commands)) + if res.Source != "" { + fmt.Fprintf(&b, "Setup source: %s.\n", res.Source) + } + if res.Provider != "" { + fmt.Fprintf(&b, "Setup provider: %s.\n", res.Provider) + } + if res.ReadinessEvidence != "" { + fmt.Fprintf(&b, "Readiness evidence: %s\n", res.ReadinessEvidence) + } + if len(res.SuccessfulCommands) > 0 { + b.WriteString("Successful setup plan (this is the plan the setup phase persisted):\n") + for i, cmd := range res.SuccessfulCommands { + fmt.Fprintf(&b, " %d. `%s`", i+1, cmd.Run) + if cmd.Why != "" { + fmt.Fprintf(&b, " — %s", cmd.Why) + } + b.WriteString("\n") + } + } + if update != nil && update.Changed { + fmt.Fprintf(&b, "Galley updated environment.yaml setup field at %s (%s).\n", update.ProfilePath, update.Reason) + } else if res.Status == SetupStatusReady { + b.WriteString("Setup readiness was confirmed without changing environment.yaml.\n") + } + return prompt + b.String() +} + +// LoadSetupResult reads the persisted setup_result.json. Returns (nil, nil) +// when the file does not exist so callers can probe unconditionally. +func LoadSetupResult(runDir string) (*SetupResult, error) { + path := filepath.Join(runDir, "setup_result.json") + data, err := os.ReadFile(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, nil + } + return nil, err + } + var res SetupResult + if err := json.Unmarshal(data, &res); err != nil { + return nil, err + } + return &res, nil +} + +// WriteSetupEnvironmentUpdate persists the profile-rewrite audit record. +func WriteSetupEnvironmentUpdate(runDir string, update *SetupEnvironmentUpdate) error { + if update == nil { + return nil + } + if err := os.MkdirAll(runDir, 0o700); err != nil { + return fmt.Errorf("create setup run dir: %w", err) + } + return writeJSON(filepath.Join(runDir, "environment_update.json"), update) +} + +func shellExecutable(_ profile.Bundle) string { + if path, err := exec.LookPath("bash"); err == nil { + return path + } + if path, err := exec.LookPath("sh"); err == nil { + return path + } + return "/bin/sh" +} + +func setupCommandTimeout(t task.Task) time.Duration { + if t.ExecutionPolicy.TimeoutMS > 0 { + return time.Duration(t.ExecutionPolicy.TimeoutMS) * time.Millisecond + } + return 30 * time.Minute +} + +func truncateExcerpt(s string) string { + const max = 400 + s = strings.TrimSpace(s) + if len(s) <= max { + return s + } + return "…" + s[len(s)-max:] +} + +// discoverRepositorySignals returns a small set of repository setup signal +// paths (manifests, lockfiles, setup docs) the daemon surfaces to the setup +// executor when no authored plan exists. The list is intentionally bounded so +// the work order payload stays small. +func discoverRepositorySignals(workDir string) []string { + candidates := []string{ + "package.json", "package-lock.json", "pnpm-lock.yaml", "yarn.lock", + "go.mod", "go.sum", + "pyproject.toml", "poetry.lock", "requirements.txt", "Pipfile.lock", + "Cargo.toml", "Cargo.lock", + "Gemfile", "Gemfile.lock", + "build.gradle", "build.gradle.kts", "pom.xml", + "Makefile", "Taskfile.yml", + ".tool-versions", "mise.toml", ".nvmrc", + "README.md", "CONTRIBUTING.md", "docs/setup.md", + } + out := make([]string, 0, 8) + for _, name := range candidates { + if _, err := os.Stat(filepath.Join(workDir, name)); err == nil { + out = append(out, name) + } + } + if scripts, err := os.ReadDir(filepath.Join(workDir, "scripts")); err == nil { + for _, entry := range scripts { + if !entry.IsDir() { + out = append(out, filepath.Join("scripts", entry.Name())) + } + } + } + return out +} + +func marshalSetupExecutorRequest(opts SetupExecutorPreflightOptions, signals []string) ([]byte, error) { + request := map[string]any{ + "task": opts.Task, + "environment": opts.Profiles.Environment, + "quality": opts.Profiles.Quality, + "repository_signals": signals, + "worktree": opts.WorkDir, + } + return json.MarshalIndent(request, "", " ") +} + +func buildSetupExecutorCommandPlan(opts SetupExecutorPreflightOptions, payload []byte) (runner.Command, string, error) { + provider := setupExecutorProvider(opts.Task) + switch provider { + case "codex": + cmd, err := buildCodexSetupExecutorCommandPlan(opts, payload) + return cmd, provider, err + default: + cmd, err := buildClaudeSetupExecutorCommandPlan(opts, payload) + return cmd, "claude", err + } +} + +// setupExecutorProvider mirrors creatorProvider: the setup executor follows +// the task implementation executor backend (AC4) so the same provider runs +// setup, optional skeleton creation, and implementation. +func setupExecutorProvider(t task.Task) string { + switch t.Executor.CLI { + case "codex": + return "codex" + default: + return "claude" + } +} + +func buildClaudeSetupExecutorCommandPlan(opts SetupExecutorPreflightOptions, payload []byte) (runner.Command, error) { + bin := opts.ClaudeBin + if bin == "" { + bin = "claude" + } + guardDir, err := claudeguard.Ensure(filepath.Join(opts.RunDir, "claude-guard-plugin")) + if err != nil { + return runner.Command{}, fmt.Errorf("prepare setup guard plugin: %w", err) + } + guardDir, err = filepath.Abs(guardDir) + if err != nil { + return runner.Command{}, fmt.Errorf("resolve setup guard plugin: %w", err) + } + commandPlan, err := runner.ClaudeCommandPlan(runner.ClaudeOptions{ + Bin: bin, + Model: opts.Task.Executor.Model, + Effort: opts.Task.Executor.Effort, + WorkDir: opts.WorkDir, + Prompt: string(payload), + SystemPrompt: prompts.SetupExecutorClaude(), + JSONSchema: schemas.SetupResult, + PromptMode: "replace", + PermissionMode: "bypassPermissions", + MaxBudgetUSD: opts.Task.Executor.MaxBudgetUSDValue(), + PluginDirs: []string{guardDir}, + AttemptDir: opts.RunDir, + }) + if err != nil { + return runner.Command{}, fmt.Errorf("plan setup executor: %w", err) + } + commandPlan.Env = runner.RestrictedEnv("GALLEY_CLAUDE_GUARD_MODE=setup_executor") + return commandPlan, nil +} + +func buildCodexSetupExecutorCommandPlan(opts SetupExecutorPreflightOptions, payload []byte) (runner.Command, error) { + codexOpts := runner.CodexFromTask(opts.Task) + codexOpts.Bin = opts.CodexBin + if codexOpts.Bin == "" { + codexOpts.Bin = "codex" + } + codexOpts.WorkDir = opts.WorkDir + codexOpts.Prompt = string(payload) + codexOpts.SystemPrompt = prompts.SetupExecutorCodex() + codexOpts.JSONSchema = schemas.SetupResult + codexOpts.AttemptDir = opts.RunDir + + commandPlan, err := runner.CodexCommandPlan(codexOpts) + if err != nil { + return runner.Command{}, fmt.Errorf("plan setup executor: %w", err) + } + return commandPlan, nil +} + +func writeSetupExecutorCommandPlan(runDir string, commandPlan runner.Command) error { + planPath := filepath.Join(runDir, "setup_executor_command_plan.json") + auditPlan := commandPlan + auditPlan.Env = nil + return writeJSON(planPath, auditPlan) +} + +func runSetupExecutorCommand(ctx context.Context, opts SetupExecutorPreflightOptions, commandPlan runner.Command) (runner.RunResult, error) { + stdoutPath := filepath.Join(opts.RunDir, "setup_executor.stdout.jsonl") + stderrPath := filepath.Join(opts.RunDir, "setup_executor.stderr.log") + return runner.RunCommand(ctx, commandPlan, runner.RunOptions{ + Timeout: setupCommandTimeout(opts.Task), + StdoutPath: stdoutPath, + StderrPath: stderrPath, + TailBytes: 16384, + }) +} + +// resolveSetupExecutorResult parses the setup executor's structured result +// from the provider's canonical output surface. Codex emits the final JSON +// message through `--output-last-message`; Claude streams the JSON through +// stdout JSONL. +func resolveSetupExecutorResult(opts SetupExecutorPreflightOptions, stdoutTail string) (*SetupResult, error) { + if setupExecutorProvider(opts.Task) == "codex" { + lastMessagePath := filepath.Join(opts.RunDir, runner.CodexLastMessageFilename) + if data, err := os.ReadFile(lastMessagePath); err == nil { + if res, ok := parseSetupResultText(string(data)); ok { + return res, nil + } + } + } + if res, ok := parseSetupResultText(stdoutTail); ok { + return res, nil + } + // Best-effort fallback: scan the stream line by line for an embedded JSON object. + for _, line := range strings.Split(strings.TrimSpace(stdoutTail), "\n") { + line = strings.TrimSpace(line) + if line == "" { + continue + } + var event map[string]json.RawMessage + if err := json.Unmarshal([]byte(line), &event); err == nil { + for _, key := range []string{"result", "response", "message"} { + raw, ok := event[key] + if !ok { + continue + } + var text string + if err := json.Unmarshal(raw, &text); err == nil { + if res, ok := parseSetupResultText(text); ok { + return res, nil + } + } + if res, ok := parseSetupResultRaw(raw); ok { + return res, nil + } + } + } + } + return nil, fmt.Errorf("setup executor result JSON not found") +} + +func parseSetupResultText(text string) (*SetupResult, bool) { + start := strings.Index(text, "{") + end := strings.LastIndex(text, "}") + if start < 0 || end < start { + return nil, false + } + return parseSetupResultRaw([]byte(text[start : end+1])) +} + +func parseSetupResultRaw(data []byte) (*SetupResult, bool) { + var res SetupResult + if err := json.Unmarshal(data, &res); err != nil { + return nil, false + } + if res.Status == "" || res.Commands == nil { + return nil, false + } + return &res, true +} diff --git a/internal/daemon/setup_executor_preflight_test.go b/internal/daemon/setup_executor_preflight_test.go new file mode 100644 index 0000000..ca2393c --- /dev/null +++ b/internal/daemon/setup_executor_preflight_test.go @@ -0,0 +1,917 @@ +package daemon + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/shinpr/galley/internal/profile" + "github.com/shinpr/galley/internal/runner" + "github.com/shinpr/galley/internal/task" +) + +// setupTask returns a minimal task usable by setup preflight tests. Callers +// override fields they care about. +func setupTask() task.Task { + return task.Task{ + ID: "setup-test", + AcceptanceCriteria: []task.AcceptanceCriterion{{ID: "AC1", Text: "AC1", Verification: "see AC1", Status: "pending"}}, + Scope: task.Scope{AllowedPaths: []string{"."}}, + Executor: task.Executor{CLI: "claude"}, + } +} + +func writeSetupEnvironmentProfile(t *testing.T, dir string, body string) string { + t.Helper() + path := filepath.Join(dir, "environment-local.yaml") + if err := os.WriteFile(path, []byte(body), 0o600); err != nil { + t.Fatal(err) + } + return path +} + +// TestSetupPreflightRunsBeforeAcceptanceSkeletonAndExecutor proves AC2: the +// setup preflight runs after worktree/input-file preparation and before any +// acceptance skeleton work or implementation executor attempt. The setup +// executor runner stub records its observed order vs the daemon-level +// acceptance_skeleton_creator subprocess. +func TestSetupPreflightSequencesBeforeSkeletonAndExecutor(t *testing.T) { + // Authored setup plan that writes a sentinel file BEFORE skeleton creation. + // AcceptanceSkeletonPreflight is invoked by processClaimedTask AFTER the + // setup preflight, so if setup ran first the sentinel exists by the time + // AcceptanceSkeletonPreflight's stub creator writes the skeleton file. + root := filepath.Join(t.TempDir(), ".agent-workflow") + repo := initDaemonGitRepo(t) + promptPath, schemaPath := writeDaemonPromptFiles(t) + // Fake Claude that, when invoked as the implementation executor, fails + // loudly if the sentinel from setup is missing. The skeleton creator path + // is detected by the manifest substring in the system prompt; the setup + // preflight stub does not spawn Claude. + claudeBin := writeFakeClaude(t, `creator=0 +for arg in "$@"; do + case "$arg" in + *"Galley Acceptance Skeleton Manifest"*) creator=1 ;; + esac +done +if [ "$creator" = "1" ]; then + if [ ! -f setup.sentinel ]; then + echo "skeleton ran before setup" 1>&2 + exit 99 + fi + mkdir -p internal/foo + printf 'package foo_test\n' > internal/foo/foo_test.go + printf '%s\n' '{"type":"result","result":"{\"outputs\":[{\"ac_id\":\"AC1\",\"path\":\"internal/foo/foo_test.go\",\"kind\":\"go-test\",\"purpose\":\"verify AC1\",\"satisfies\":\"AC1 observable behavior\",\"integration_point\":\"executor completes this skeleton before acceptance\",\"implementation_required\":true}],\"no_skeletons\":[]}"}' + exit 0 +fi +if [ ! -f setup.sentinel ]; then + echo "executor ran before setup" 1>&2 + exit 98 +fi +echo change > daemon-output.txt +echo '{"status":"completed","summary":"done","files_modified":["daemon-output.txt"],"acceptance_criteria":[{"id":"AC1","status":"satisfied","evidence":["diff"],"notes":"done"}],"verification":[],"decisions":[],"risks":[]}' +`) + + // Authored environment profile with a setup plan whose first command + // writes the sentinel inside the worktree. When the daemon-level setup + // preflight runs the authored plan it creates setup.sentinel before + // AcceptanceSkeletonPreflight is invoked, which is exactly the ordering + // AC2 requires. Because the authored plan succeeds, persistLearnedSetupPlan + // is never reached and the verification excerpt records + // "environment.yaml=unchanged". + envDir := t.TempDir() + envPath := writeSetupEnvironmentProfile(t, envDir, `id: "sequencing" +cwd: `+workdirQuote(repo)+` +commands: + test_unit: "true" +constraints: + network: "approval_required" + secrets_policy: "never_read_env_files" + destructive_commands: "deny" +setup: + commands: + - run: "touch setup.sentinel" + why: "sentinel for setup-before-skeleton ordering proof" +`) + // Defensive: if anything changes the path so the daemon falls back to + // discovery, fail loudly rather than silently masking AC2. + withSetupExecutorRunner(t, func(_ context.Context, _ SetupExecutorPreflightOptions) (*SetupResult, error) { + t.Fatalf("setup executor runner should not be invoked when authored environment.setup plan succeeds") + return nil, nil + }) + + taskPath := filepath.Join(root, "tasks", "queued", "task.yaml") + if err := os.MkdirAll(filepath.Dir(taskPath), 0o755); err != nil { + t.Fatal(err) + } + writeDaemonTask(t, taskPath, repo) + loaded, err := task.Load(taskPath) + if err != nil { + t.Fatal(err) + } + loaded.Preflight = &task.Preflight{AcceptanceSkeleton: &task.AcceptanceSkeletonConfig{Enabled: true}} + if err := task.Save(taskPath, loaded); err != nil { + t.Fatal(err) + } + + if err := Run(context.Background(), Options{ + Root: root, + SystemPromptFile: promptPath, + JSONSchemaFile: schemaPath, + EnvironmentProfileFile: envPath, + Once: true, + MaxConcurrentTasks: 1, + Supervisor: "claude", + ClaudeBin: claudeBin, + }); err != nil { + t.Fatal(err) + } + + donePath := filepath.Join(root, "tasks", "done", "task.yaml") + if _, err := os.Stat(donePath); err != nil { + t.Fatalf("task did not complete; setup likely did not run before skeleton/executor: %v", err) + } + doneTask, err := task.Load(donePath) + if err != nil { + t.Fatal(err) + } + // AC8: setup readiness must be recorded in task verification history. + foundSetup := false + for _, vc := range doneTask.Verification.Commands { + if strings.HasPrefix(vc.Cmd, " entry: %#v", doneTask.Verification.Commands) + } +} + +// TestSetupPreflightAbsentSetupInvokesExecutorWithEnvironmentAndSignals proves +// AC3: when environment.setup is absent the daemon invokes the setup executor +// with the full environment.commands map and repository setup signals as +// context. +func TestSetupPreflightAbsentSetupInvokesExecutorWithEnvironmentAndSignals(t *testing.T) { + work := t.TempDir() + runDir := t.TempDir() + if err := os.WriteFile(filepath.Join(work, "package.json"), []byte("{}\n"), 0o600); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(work, "go.mod"), []byte("module example.com/foo\n"), 0o600); err != nil { + t.Fatal(err) + } + env := &profile.Environment{ + ID: "absent-setup", + CWD: work, + Commands: map[string]string{"test_unit": "go test ./...", "install": "npm ci"}, + Constraints: profile.Constraints{ + Network: "approval_required", + SecretsPolicy: "never_read_env_files", + DestructiveCommands: "deny", + }, + } + type captured struct { + Commands map[string]string + Signals []string + WorkDir string + } + cap := &captured{} + withSetupExecutorRunner(t, func(_ context.Context, opts SetupExecutorPreflightOptions) (*SetupResult, error) { + cap.Commands = map[string]string{} + if opts.Profiles.Environment != nil { + for k, v := range opts.Profiles.Environment.Commands { + cap.Commands[k] = v + } + } + signals := opts.RepositorySignals + if signals == nil { + signals = discoverRepositorySignals(opts.WorkDir) + } + cap.Signals = append([]string{}, signals...) + cap.WorkDir = opts.WorkDir + return &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{{Run: "go mod download", Source: SetupSourceDiscovered, ExitCode: 0}}, + SuccessfulCommands: []profile.SetupCommand{{Run: "go mod download", Why: "fetch modules"}}, + ReadinessEvidence: "stub readiness", + Source: SetupSourceDiscovered, + Provider: "claude", + }, nil + }) + res, _, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{Environment: env}, + }) + if err != nil { + t.Fatalf("setup preflight: %v", err) + } + if res == nil || res.Status != SetupStatusReady { + t.Fatalf("result: %+v", res) + } + if cap.Commands["test_unit"] != "go test ./..." || cap.Commands["install"] != "npm ci" { + t.Fatalf("environment.commands not passed to setup executor: %+v", cap.Commands) + } + foundPkg := false + foundGoMod := false + for _, s := range cap.Signals { + if s == "package.json" { + foundPkg = true + } + if s == "go.mod" { + foundGoMod = true + } + } + if !foundPkg || !foundGoMod { + t.Fatalf("repository signals missing manifests: %+v", cap.Signals) + } + // setup_result.json is written for evidence routing (AC8). + if _, err := os.Stat(filepath.Join(runDir, "setup_result.json")); err != nil { + t.Fatalf("setup_result.json missing: %v", err) + } +} + +// TestSetupExecutorCommandPlanClaudeAndCodex proves part of AC4: building the +// setup executor command plan for both Claude and Codex returns provider-shaped +// argv that embeds the bin path and references the canonical capture path each +// provider uses. +func TestSetupExecutorCommandPlanClaudeAndCodex(t *testing.T) { + work := t.TempDir() + runDir := t.TempDir() + payload := []byte(`{"environment":{"id":"x"}}`) + + // Claude: command_plan must use ClaudeBin and prompt mode replace. + claudeOpts := SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{}, + ClaudeBin: "/path/to/claude", + } + claudePlan, provider, err := buildSetupExecutorCommandPlan(claudeOpts, payload) + if err != nil { + t.Fatalf("claude plan: %v", err) + } + if provider != "claude" { + t.Fatalf("provider got %q", provider) + } + if len(claudePlan.Argv) == 0 || claudePlan.Argv[0] != "/path/to/claude" { + t.Fatalf("claude argv[0] got %v", claudePlan.Argv) + } + + // Codex: command_plan must use CodexBin and request --output-last-message + // pointed at the attempt-scoped capture path used by parsing. + codexTask := setupTask() + codexTask.Executor.CLI = "codex" + codexOpts := SetupExecutorPreflightOptions{ + Task: codexTask, + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{}, + CodexBin: "/path/to/codex", + } + codexPlan, provider, err := buildSetupExecutorCommandPlan(codexOpts, payload) + if err != nil { + t.Fatalf("codex plan: %v", err) + } + if provider != "codex" { + t.Fatalf("provider got %q", provider) + } + if len(codexPlan.Argv) == 0 || codexPlan.Argv[0] != "/path/to/codex" { + t.Fatalf("codex argv[0] got %v", codexPlan.Argv) + } + joined := strings.Join(codexPlan.Argv, " ") + if !strings.Contains(joined, "--output-last-message") { + t.Fatalf("codex argv missing --output-last-message: %v", codexPlan.Argv) + } + if !strings.Contains(joined, runner.CodexLastMessageFilename) { + t.Fatalf("codex argv missing capture filename: %v", codexPlan.Argv) + } +} + +// TestSetupExecutorResolveResultClaudeAndCodex proves the second part of AC4: +// the result-parsing path accepts the same SetupResult JSON shape from both +// providers — Claude via stdout tail, Codex via the attempt-scoped +// --output-last-message file. +func TestSetupExecutorResolveResultClaudeAndCodex(t *testing.T) { + runDir := t.TempDir() + claudeJSON := `{"status":"ready","commands":[{"run":"go mod download","source":"discovered","exit_code":0}],"successful_commands":[{"run":"go mod download","why":"fetch modules"}],"readiness_evidence":"ok"}` + // Claude path: parses arbitrary tail text that contains the JSON object. + claudeOpts := SetupExecutorPreflightOptions{Task: setupTask(), RunDir: runDir} + claude, err := resolveSetupExecutorResult(claudeOpts, "noise prefix\n"+claudeJSON+"\nnoise suffix") + if err != nil { + t.Fatalf("claude resolve: %v", err) + } + if claude.Status != SetupStatusReady || len(claude.Commands) != 1 || claude.Commands[0].Source != SetupSourceDiscovered { + t.Fatalf("claude parsed result: %+v", claude) + } + + // Codex path: read from runner.CodexLastMessageFilename written in runDir. + codexJSON := `Some preamble text\n` + claudeJSON + if err := os.WriteFile(filepath.Join(runDir, runner.CodexLastMessageFilename), []byte(codexJSON), 0o600); err != nil { + t.Fatal(err) + } + codexTask := setupTask() + codexTask.Executor.CLI = "codex" + codexOpts := SetupExecutorPreflightOptions{Task: codexTask, RunDir: runDir} + codex, err := resolveSetupExecutorResult(codexOpts, "") + if err != nil { + t.Fatalf("codex resolve: %v", err) + } + if codex.Status != SetupStatusReady || codex.SuccessfulCommands[0].Run != "go mod download" { + t.Fatalf("codex parsed result: %+v", codex) + } +} + +// TestSetupPreflightStaleAuthoredPlanFallsBackToDiscovery proves AC6: a stale +// authored setup plan whose first command fails causes the daemon to fall back +// to the setup executor's discovery, and both the failed authored command and +// the successful discovered command are recorded in setup_result.json with +// their distinct sources. +func TestSetupPreflightStaleAuthoredPlanFallsBackToDiscovery(t *testing.T) { + work := t.TempDir() + runDir := t.TempDir() + dir := t.TempDir() + envPath := writeSetupEnvironmentProfile(t, dir, `id: "stale" +cwd: `+workdirQuote(work)+` +commands: + test_unit: "true" +constraints: + network: "approval_required" + secrets_policy: "never_read_env_files" + destructive_commands: "deny" +setup: + commands: + - run: "false" + why: "stale authored command that no longer works" +`) + env, err := profile.LoadEnvironment(envPath) + if err != nil { + t.Fatal(err) + } + withSetupExecutorRunner(t, func(_ context.Context, opts SetupExecutorPreflightOptions) (*SetupResult, error) { + return &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{{Run: "echo discovered", Source: SetupSourceDiscovered, ExitCode: 0}}, + SuccessfulCommands: []profile.SetupCommand{{Run: "echo discovered", Why: "actual working command"}}, + ReadinessEvidence: "discovered plan passed", + Source: SetupSourceDiscovered, + Provider: "claude", + }, nil + }) + res, update, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{Environment: &env}, + EnvironmentProfilePath: envPath, + }) + if err != nil { + t.Fatalf("setup preflight: %v", err) + } + if res == nil || res.Status != SetupStatusReady { + t.Fatalf("result: %+v", res) + } + // AC6: both the failed authored command AND the successful discovered + // command must be recorded. + foundFailedAuthored := false + foundSuccessfulDiscovered := false + for _, c := range res.Commands { + if c.Source == SetupSourceEnvironmentSetup && c.Run == "false" && c.ExitCode != 0 { + foundFailedAuthored = true + } + if c.Source == SetupSourceDiscovered && c.Run == "echo discovered" && c.ExitCode == 0 { + foundSuccessfulDiscovered = true + } + } + if !foundFailedAuthored { + t.Fatalf("failed authored command not recorded: %+v", res.Commands) + } + if !foundSuccessfulDiscovered { + t.Fatalf("successful discovered command not recorded: %+v", res.Commands) + } + _ = update + // AC7: persistence-only check is in another test, but make sure update + // metadata reflects that the prior plan differed. + if update == nil || !update.Changed || update.Before == nil || len(update.Before.Commands) == 0 || update.Before.Commands[0].Run != "false" { + t.Fatalf("environment update should record prior stale plan: %+v", update) + } +} + +// TestSetupPreflightAtomicProfileRewriteAndSecondRunReuse proves AC7: a +// successful learned plan atomically rewrites environment.yaml setup while +// preserving unrelated content, and a second daemon run with the persisted +// setup uses the authored plan without invoking discovery. +func TestSetupPreflightAtomicProfileRewriteAndSecondRunReuse(t *testing.T) { + work := t.TempDir() + dir := t.TempDir() + // Profile without setup; includes unrelated content that must survive the + // atomic rewrite. pr.base is intentionally written as an unquoted scalar + // so the round-trip check below proves the YAML node style is preserved + // (a previous regression re-quoted it on rewrite). + envBody := `id: "absent-setup" +cwd: ` + workdirQuote(work) + ` +commands: + test_unit: "go test ./..." +constraints: + network: "approval_required" + secrets_policy: "never_read_env_files" + destructive_commands: "deny" +pr: + enabled: true + base: main +` + envPath := writeSetupEnvironmentProfile(t, dir, envBody) + env1, err := profile.LoadEnvironment(envPath) + if err != nil { + t.Fatal(err) + } + runDir1 := t.TempDir() + discoveryCalls := 0 + withSetupExecutorRunner(t, func(_ context.Context, _ SetupExecutorPreflightOptions) (*SetupResult, error) { + discoveryCalls++ + return &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{{Run: "echo learned", Source: SetupSourceDiscovered, ExitCode: 0}}, + SuccessfulCommands: []profile.SetupCommand{{Run: "echo learned", Why: "learned setup"}}, + ReadinessEvidence: "discovery passed", + Source: SetupSourceDiscovered, + Provider: "claude", + }, nil + }) + res1, update1, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir1, + Profiles: profile.Bundle{Environment: &env1}, + EnvironmentProfilePath: envPath, + }) + if err != nil { + t.Fatalf("first run: %v", err) + } + if discoveryCalls != 1 { + t.Fatalf("discoveryCalls first run got %d, want 1", discoveryCalls) + } + if res1 == nil || res1.Status != SetupStatusReady { + t.Fatalf("first run result: %+v", res1) + } + if update1 == nil || !update1.Changed { + t.Fatalf("first run should have persisted plan: %+v", update1) + } + + // Verify the file was atomically rewritten with the new setup AND unrelated + // content is preserved. + rewritten, err := os.ReadFile(envPath) + if err != nil { + t.Fatal(err) + } + body := string(rewritten) + if !strings.Contains(body, "echo learned") { + t.Fatalf("setup not persisted: %s", body) + } + if !strings.Contains(body, "test_unit") || !strings.Contains(body, "approval_required") || !strings.Contains(body, "base: main") { + t.Fatalf("unrelated content lost in rewrite: %s", body) + } + // Re-validate the rewritten file end-to-end. + env2, err := profile.LoadEnvironment(envPath) + if err != nil { + t.Fatalf("reload rewritten env: %v", err) + } + if vr := profile.ValidateEnvironment(env2); !vr.Valid() { + t.Fatalf("rewritten env failed validation: %v", vr.Errors) + } + if env2.Setup == nil || len(env2.Setup.Commands) != 1 || env2.Setup.Commands[0].Run != "echo learned" { + t.Fatalf("rewritten env setup wrong: %+v", env2.Setup) + } + + // AC7 second-run reuse: re-running preflight with the persisted plan must + // NOT invoke discovery and must not record any new environment.yaml change. + runDir2 := t.TempDir() + res2, update2, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir2, + Profiles: profile.Bundle{Environment: &env2}, + EnvironmentProfilePath: envPath, + }) + if err != nil { + t.Fatalf("second run: %v", err) + } + if discoveryCalls != 1 { + t.Fatalf("discovery was re-invoked on second run; discoveryCalls=%d", discoveryCalls) + } + if res2 == nil || res2.Status != SetupStatusReady { + t.Fatalf("second run result: %+v", res2) + } + if update2 != nil { + t.Fatalf("second run should not have rewritten environment.yaml: %+v", update2) + } + // All recorded commands should be from the authored (now persisted) source. + for _, c := range res2.Commands { + if c.Source == SetupSourceDiscovered { + t.Fatalf("second run recorded discovered command but should reuse persisted plan: %+v", res2.Commands) + } + } +} + +// TestSetupPreflightSetupFailedClassifiesAndWritesEvidence proves AC9: when +// setup cannot make the worktree ready, the preflight returns an error and +// writes setup_result.json with status=failed, the executor's failure +// excerpts, and repair guidance. +func TestSetupPreflightSetupFailedClassifiesAndWritesEvidence(t *testing.T) { + work := t.TempDir() + runDir := t.TempDir() + env := &profile.Environment{ + ID: "fail", + CWD: work, + Constraints: profile.Constraints{ + Network: "approval_required", + SecretsPolicy: "never_read_env_files", + DestructiveCommands: "deny", + }, + } + withSetupExecutorRunner(t, func(_ context.Context, _ SetupExecutorPreflightOptions) (*SetupResult, error) { + return &SetupResult{ + Status: SetupStatusFailed, + Commands: []SetupCommandAttempt{{Run: "npm ci", Source: SetupSourceDiscovered, ExitCode: 1, StderrExcerpt: "ENOENT"}}, + Error: "setup executor could not install dependencies", + RepairGuidance: "set environment.commands.install to a working command, or author environment.setup", + Source: SetupSourceDiscovered, + Provider: "claude", + }, nil + }) + _, _, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{Environment: env}, + }) + if err == nil { + t.Fatalf("expected setup preflight error") + } + // setup_result.json must contain the failure facts. + data, readErr := os.ReadFile(filepath.Join(runDir, "setup_result.json")) + if readErr != nil { + t.Fatalf("setup_result.json missing: %v", readErr) + } + var saved SetupResult + if err := json.Unmarshal(data, &saved); err != nil { + t.Fatalf("decode setup_result.json: %v", err) + } + if saved.Status != SetupStatusFailed { + t.Fatalf("saved status got %q, want failed", saved.Status) + } + if saved.Error == "" || saved.RepairGuidance == "" { + t.Fatalf("saved failure missing error or repair guidance: %+v", saved) + } + if len(saved.Commands) == 0 || saved.Commands[0].Run != "npm ci" { + t.Fatalf("attempted commands not recorded: %+v", saved.Commands) + } + + // Phase/kind classification: applying the failure path through the same + // daemon helper used by processClaimedTask must yield phase=setup and + // kind=setup_failed in the task latest error. + tk := setupTask() + appendFailureAttempt(&tk, SetupPhase, SetupFailedKind, err, runDir) + if len(tk.Attempts) == 0 || tk.Attempts[len(tk.Attempts)-1].Error == nil { + t.Fatalf("no attempt error appended") + } + last := tk.Attempts[len(tk.Attempts)-1].Error + if last.Phase != SetupPhase || last.Kind != SetupFailedKind { + t.Fatalf("phase/kind got phase=%q kind=%q want phase=%q kind=%q", last.Phase, last.Kind, SetupPhase, SetupFailedKind) + } +} + +// TestSetupPreflightExcludesAcceptanceSkeletonReadiness proves AC10: setup +// readiness checks do not include acceptance-skeleton obligations. The +// authored-plan readiness check picks a required quality check; with no +// skeleton-related required checks defined, the readiness check should succeed +// without depending on any task-specific skeleton file. +func TestSetupPreflightExcludesAcceptanceSkeletonReadiness(t *testing.T) { + work := t.TempDir() + runDir := t.TempDir() + env := &profile.Environment{ + ID: "ac10", + CWD: work, + Setup: &profile.SetupPlan{Commands: []profile.SetupCommand{ + {Run: "true", Why: "noop"}, + }}, + Constraints: profile.Constraints{ + Network: "approval_required", + SecretsPolicy: "never_read_env_files", + DestructiveCommands: "deny", + }, + } + // Required quality check picks a repository readiness command (`true`), + // NOT a skeleton-specific command. AC10's contract is that no + // task-specific skeleton must exist for setup to declare ready. + quality := &profile.Quality{ + ID: "test", + RequiredChecks: []profile.RequiredCheck{ + {ID: "smoke", PreferredCommands: []string{"true"}, Required: true}, + }, + PassPolicy: profile.PassPolicy{MinScore: 1}, + } + tk := setupTask() + // The task carries an acceptance skeleton preflight config that would + // normally drive AcceptanceSkeletonPreflight; the setup preflight must + // finish first and must not depend on any skeleton file having been + // created. + tk.Preflight = &task.Preflight{AcceptanceSkeleton: &task.AcceptanceSkeletonConfig{Enabled: true}} + res, _, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: tk, + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{Environment: env, Quality: quality}, + }) + if err != nil { + t.Fatalf("setup preflight: %v", err) + } + if res == nil || res.Status != SetupStatusReady { + t.Fatalf("setup result: %+v", res) + } + // The recorded commands must include the readiness check entry with the + // daemon-only source value `readiness_check` (AC10 + schema fix). + foundReadiness := false + for _, c := range res.Commands { + if c.Source == SetupSourceReadinessCheck { + foundReadiness = true + if c.ExitCode != 0 { + t.Fatalf("readiness check failed: %+v", c) + } + } + if strings.Contains(c.Run, "internal/foo/foo_test.go") { + t.Fatalf("readiness check executed task-specific skeleton command: %+v", c) + } + } + if !foundReadiness { + t.Fatalf("readiness check entry missing from commands: %+v", res.Commands) + } + // The worktree directory must not contain any skeleton files written by + // setup; only the authored plan ran. + entries, _ := os.ReadDir(work) + if len(entries) != 0 { + t.Fatalf("setup left files in worktree: %v", entries) + } +} + +// workdirQuote returns a YAML-safe double-quoted string for a path; t.TempDir +// returns absolute paths so simple Go strconv.Quote is sufficient for tests. +func workdirQuote(s string) string { + // JSON-quote-style escaping is YAML 1.2 compatible for double-quoted + // scalars in our examples. + b, _ := json.Marshal(s) + return string(b) +} + +// TestSetupPreflightReadyWithoutSuccessfulCommandsFailsAndKeepsEnvironmentUnchanged +// is the regression test for the setup-result contract update: a setup +// executor that returns status=ready with no successful_commands cannot +// produce a learned plan, so the daemon must downgrade the result to failed +// (with repair guidance), keep setup_result.json diagnostic, and NOT silently +// leave environment.yaml unchanged behind a fake-passing setup. The previous +// behavior silently skipped persistence and treated the result as ready, +// which violated AC7's "environment.yaml is not silently left unchanged" +// invariant. +func TestSetupPreflightReadyWithoutSuccessfulCommandsFailsAndKeepsEnvironmentUnchanged(t *testing.T) { + work := t.TempDir() + runDir := t.TempDir() + dir := t.TempDir() + // Profile WITHOUT a setup field: this is the path where a learned plan + // would normally be persisted. A pre-existing pr block lets us prove the + // rewrite is suppressed end-to-end (the file content stays byte-identical). + envBody := `id: "no-learned-plan" +cwd: ` + workdirQuote(work) + ` +commands: + test_unit: "go test ./..." +constraints: + network: "approval_required" + secrets_policy: "never_read_env_files" + destructive_commands: "deny" +pr: + enabled: true + base: "main" +` + envPath := writeSetupEnvironmentProfile(t, dir, envBody) + before, err := os.ReadFile(envPath) + if err != nil { + t.Fatal(err) + } + env, err := profile.LoadEnvironment(envPath) + if err != nil { + t.Fatal(err) + } + withSetupExecutorRunner(t, func(_ context.Context, _ SetupExecutorPreflightOptions) (*SetupResult, error) { + // status=ready but empty successful_commands — exactly the contract + // the daemon must reject. + return &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{{Run: "echo ok", Source: SetupSourceDiscovered, ExitCode: 0}}, + ReadinessEvidence: "executor claimed ready without reporting commands", + Source: SetupSourceDiscovered, + Provider: "claude", + }, nil + }) + res, update, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{Environment: &env}, + EnvironmentProfilePath: envPath, + }) + if err == nil { + t.Fatalf("expected setup preflight error; got res=%+v update=%+v", res, update) + } + if update != nil { + t.Fatalf("update should be nil when no learned plan can be persisted: %+v", update) + } + // environment.yaml must be byte-identical: not silently left unchanged + // behind a ready+empty-plan facade. The contract is "no silent unchanged". + after, err := os.ReadFile(envPath) + if err != nil { + t.Fatal(err) + } + if string(before) != string(after) { + t.Fatalf("environment.yaml was modified despite missing successful_commands:\nbefore=%s\nafter=%s", before, after) + } + // setup_result.json must be diagnostic: status=failed with repair guidance. + data, readErr := os.ReadFile(filepath.Join(runDir, "setup_result.json")) + if readErr != nil { + t.Fatalf("setup_result.json missing: %v", readErr) + } + var saved SetupResult + if err := json.Unmarshal(data, &saved); err != nil { + t.Fatalf("decode setup_result.json: %v", err) + } + if saved.Status != SetupStatusFailed { + t.Fatalf("saved status got %q, want failed", saved.Status) + } + if saved.Error == "" || !strings.Contains(saved.Error, "successful_commands") { + t.Fatalf("saved error must name the contract violation: %q", saved.Error) + } + if saved.RepairGuidance == "" { + t.Fatalf("saved failure missing repair guidance: %+v", saved) + } +} + +// TestSetupResultSchemaMatchesPersistedShape is the JSON/schema validation +// regression for the persisted SetupResult shape. It writes a representative +// SetupResult (covering every field the runtime can serialize today), +// re-loads schemas/setup-result.schema.json, and validates the saved JSON +// against it: required keys are present, no extra keys leak (the schema is +// additionalProperties:false), and enum-constrained fields (status, source, +// provider, per-command source) carry values from the schema's enum lists. +// This pins the schema/runtime sync the previous contract drift would have +// allowed (the runtime persisted provider/source fields that the published +// schema did not declare). +func TestSetupResultSchemaMatchesPersistedShape(t *testing.T) { + runDir := t.TempDir() + res := &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{ + {Run: "go mod download", Why: "fetch modules", Source: SetupSourceDiscovered, ExitCode: 0, StdoutExcerpt: "ok", StderrExcerpt: ""}, + {Run: "go build ./...", Why: "readiness verification", Source: SetupSourceReadinessCheck, ExitCode: 0}, + }, + SuccessfulCommands: []profile.SetupCommand{{Run: "go mod download", Why: "fetch modules"}}, + InspectedFiles: []string{"go.mod", "go.sum"}, + ReadinessEvidence: "discovery passed; readiness verified", + Provider: "claude", + Source: SetupSourceDiscovered, + } + if err := WriteSetupResult(runDir, res); err != nil { + t.Fatalf("write setup result: %v", err) + } + savedRaw, err := os.ReadFile(filepath.Join(runDir, "setup_result.json")) + if err != nil { + t.Fatal(err) + } + var saved map[string]any + if err := json.Unmarshal(savedRaw, &saved); err != nil { + t.Fatalf("decode saved: %v", err) + } + schemaPath, err := filepath.Abs("../../schemas/setup-result.schema.json") + if err != nil { + t.Fatal(err) + } + schemaRaw, err := os.ReadFile(schemaPath) + if err != nil { + t.Fatalf("read schema: %v", err) + } + var schema map[string]any + if err := json.Unmarshal(schemaRaw, &schema); err != nil { + t.Fatalf("decode schema: %v", err) + } + if errs := validateAgainstSchemaForTest(saved, schema, "$"); len(errs) > 0 { + t.Fatalf("saved setup_result.json does not match schema:\n %s", strings.Join(errs, "\n ")) + } + // Also assert the schema declares the runtime-persisted fields the + // previous contract drift omitted (provider, source). This guards future + // drift where someone removes one of them from the schema without also + // removing it from the Go struct. + props, _ := schema["properties"].(map[string]any) + for _, requiredKey := range []string{"provider", "source", "successful_commands", "inspected_files", "readiness_evidence", "repair_guidance", "error"} { + if _, ok := props[requiredKey]; !ok { + t.Fatalf("schema missing property %q that the runtime persists", requiredKey) + } + } +} + +// validateAgainstSchemaForTest is a focused JSON-schema walker. It covers +// the constraints the persisted SetupResult schema actually uses today +// (type, required, additionalProperties:false, enum, array items, object +// properties) without pulling in a third-party schema library — Galley's +// go.mod intentionally avoids one. It returns a list of human-readable +// violation messages; an empty slice means the value satisfies the schema. +func validateAgainstSchemaForTest(value any, schema map[string]any, path string) []string { + var errs []string + if typ, ok := schema["type"].(string); ok { + if !jsonTypeMatchesForTest(typ, value) { + errs = append(errs, fmt.Sprintf("%s: type mismatch: want %s, got %T", path, typ, value)) + return errs + } + } + if enum, ok := schema["enum"].([]any); ok { + matched := false + for _, allowed := range enum { + if allowed == value { + matched = true + break + } + } + if !matched { + errs = append(errs, fmt.Sprintf("%s: value %v not in enum %v", path, value, enum)) + } + } + switch v := value.(type) { + case map[string]any: + props, _ := schema["properties"].(map[string]any) + additional, additionalSpecified := schema["additionalProperties"] + if required, ok := schema["required"].([]any); ok { + for _, r := range required { + name, _ := r.(string) + if _, has := v[name]; !has { + errs = append(errs, fmt.Sprintf("%s: missing required property %q", path, name)) + } + } + } + for key, child := range v { + subSchema, declared := props[key].(map[string]any) + if !declared { + if additionalSpecified { + if allow, ok := additional.(bool); ok && !allow { + errs = append(errs, fmt.Sprintf("%s: property %q is not declared in schema and additionalProperties is false", path, key)) + } + } + continue + } + errs = append(errs, validateAgainstSchemaForTest(child, subSchema, path+"."+key)...) + } + case []any: + if items, ok := schema["items"].(map[string]any); ok { + for i, child := range v { + errs = append(errs, validateAgainstSchemaForTest(child, items, fmt.Sprintf("%s[%d]", path, i))...) + } + } + } + return errs +} + +func jsonTypeMatchesForTest(want string, value any) bool { + switch want { + case "object": + _, ok := value.(map[string]any) + return ok + case "array": + _, ok := value.([]any) + return ok + case "string": + _, ok := value.(string) + return ok + case "integer": + // encoding/json decodes numbers as float64; accept whole-number floats. + if f, ok := value.(float64); ok { + return f == float64(int64(f)) + } + return false + case "number": + _, ok := value.(float64) + return ok + case "boolean": + _, ok := value.(bool) + return ok + case "null": + return value == nil + } + return true +} diff --git a/internal/daemon/testmain_test.go b/internal/daemon/testmain_test.go index 1ad6536..ad9c360 100644 --- a/internal/daemon/testmain_test.go +++ b/internal/daemon/testmain_test.go @@ -6,6 +6,7 @@ import ( "testing" "time" + "github.com/shinpr/galley/internal/profile" "github.com/shinpr/galley/internal/retry" ) @@ -15,12 +16,50 @@ import ( // schedule. Retry timing and cancel semantics are covered by the // internal/retry tests; daemon tests only need the retry loop to advance // instantly. +// +// The setupExecutorRunner is also stubbed to a default ready result so the +// majority of daemon tests, which do not exercise the setup executor preflight, +// do not need to spawn a real Claude/Codex subprocess. Tests that exercise the +// setup preflight contract override setupExecutorRunner explicitly via +// withSetupExecutorRunner. func TestMain(m *testing.M) { - restore := retry.SetHooksForTest( + restoreRetry := retry.SetHooksForTest( func(_ context.Context, _ time.Duration) error { return nil }, func() float64 { return 1.0 }, ) + prevSetup := setupExecutorRunner + setupExecutorRunner = defaultTestSetupExecutorRunner code := m.Run() - restore() + setupExecutorRunner = prevSetup + restoreRetry() os.Exit(code) } + +// defaultTestSetupExecutorRunner returns a successful no-op setup result so +// daemon tests that do not exercise the setup executor preflight do not have +// to wire a real subprocess. The result carries an explicit readiness evidence +// string so an accidental dependency would surface clearly. +// +// The stub returns one SuccessfulCommand (`true`) so the learned-plan contract +// enforced by enforceLearnedSetupPlanContract passes: a real setup executor +// that returns status=ready with no successful_commands fails the setup +// phase, and using an empty stub here would either mask that contract or +// inject false-positive successes into daemon tests that exercise the setup +// preflight indirectly through Run. +func defaultTestSetupExecutorRunner(_ context.Context, _ SetupExecutorPreflightOptions) (*SetupResult, error) { + return &SetupResult{ + Status: SetupStatusReady, + Commands: []SetupCommandAttempt{{Run: "true", Source: SetupSourceDiscovered, ExitCode: 0}}, + SuccessfulCommands: []profile.SetupCommand{{Run: "true", Why: "test stub no-op"}}, + ReadinessEvidence: "test stub: setup executor preflight skipped", + Source: SetupSourceDiscovered, + }, nil +} + +// withSetupExecutorRunner installs a setup executor runner for the duration of +// a test. The previous runner is restored when the cleanup fires. +func withSetupExecutorRunner(t interface{ Cleanup(func()) }, runner func(context.Context, SetupExecutorPreflightOptions) (*SetupResult, error)) { + prev := setupExecutorRunner + setupExecutorRunner = runner + t.Cleanup(func() { setupExecutorRunner = prev }) +} diff --git a/internal/profile/profile.go b/internal/profile/profile.go index e3fab3d..75a9990 100644 --- a/internal/profile/profile.go +++ b/internal/profile/profile.go @@ -54,6 +54,28 @@ type Environment struct { Constraints Constraints `yaml:"constraints" json:"constraints"` PR PRSettings `yaml:"pr,omitempty" json:"pr,omitempty"` Worktree WorktreeSettings `yaml:"worktree,omitempty" json:"worktree,omitempty"` + // Setup describes how Galley prepares a fresh task worktree before the + // implementation executor begins. When present, the daemon runs the listed + // commands as the setup phase. When absent, the daemon dispatches a setup + // executor that may discover the successful setup plan and write it back to + // environment.yaml so subsequent tasks reuse the learned setup without + // rediscovery. + Setup *SetupPlan `yaml:"setup,omitempty" json:"setup,omitempty"` +} + +// SetupPlan is the ordered list of commands Galley runs during the setup +// preflight phase. The plan may be authored by the operator or learned by the +// setup executor and persisted back to environment.yaml. +type SetupPlan struct { + Commands []SetupCommand `yaml:"commands" json:"commands"` +} + +// SetupCommand is a single command in a SetupPlan. The optional Why string +// explains the command's purpose so the persisted environment.yaml is readable +// by humans after a learned setup is written back. +type SetupCommand struct { + Run string `yaml:"run" json:"run"` + Why string `yaml:"why,omitempty" json:"why,omitempty"` } type ExecutorDefault struct { @@ -224,6 +246,17 @@ func ValidateEnvironment(env Environment) ValidationResult { if env.PR.Comments.Reply && !env.PR.Comments.Enabled { result.Warnings = append(result.Warnings, "pr.comments.reply is set while pr.comments.enabled is false") } + if env.Setup != nil { + if len(env.Setup.Commands) == 0 { + result.Errors = append(result.Errors, "setup.commands must not be empty when setup is present") + } + for i, cmd := range env.Setup.Commands { + prefix := fmt.Sprintf("setup.commands[%d]", i) + if strings.TrimSpace(cmd.Run) == "" { + result.Errors = append(result.Errors, fmt.Sprintf("%s.run is required", prefix)) + } + } + } return result } @@ -278,6 +311,144 @@ func InferRequiredCheckShellKind(shellPath string) string { return "" } +// UpdateEnvironmentSetup atomically rewrites the setup field of the environment +// YAML at path, preserving unrelated profile content. The rewritten document is +// decoded and validated in memory BEFORE the atomic rename — an invalid setup +// never reaches the published file path, and concurrent readers therefore never +// observe a transiently invalid state. The function returns the prior setup +// plan (nil when absent) so callers can persist a before/after record of the +// change. +func UpdateEnvironmentSetup(path string, plan SetupPlan) (*SetupPlan, error) { + if path == "" { + return nil, fmt.Errorf("environment profile path is required") + } + original, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("read environment profile %s: %w", path, err) + } + // Decode into a generic yaml.Node so unrelated fields, ordering, and + // comments survive the rewrite. Replacing the setup mapping in-place keeps + // the rest of the document untouched. + var root yaml.Node + if err := yaml.Unmarshal(original, &root); err != nil { + return nil, fmt.Errorf("decode environment profile %s: %w", path, err) + } + prior := extractEnvironmentSetup(&root) + if err := replaceEnvironmentSetup(&root, plan); err != nil { + return nil, fmt.Errorf("update environment profile setup: %w", err) + } + var buf bytes.Buffer + enc := yaml.NewEncoder(&buf) + enc.SetIndent(2) + if err := enc.Encode(&root); err != nil { + return nil, fmt.Errorf("encode environment profile %s: %w", path, err) + } + if err := enc.Close(); err != nil { + return nil, fmt.Errorf("flush environment profile %s: %w", path, err) + } + // Validate the rewritten bytes in memory before publication. This is the + // publication boundary: parse the new document with the same KnownFields + // decoder used by LoadEnvironment, then run ValidateEnvironment on the + // decoded value. If either step fails the rewrite is rejected without + // touching the on-disk file at path, so the original file remains the + // canonical state for any reader. + var updated Environment + decoder := yaml.NewDecoder(bytes.NewReader(buf.Bytes())) + decoder.KnownFields(true) + if err := decoder.Decode(&updated); err != nil { + return nil, fmt.Errorf("decode rewritten environment profile: %w", err) + } + if vr := ValidateEnvironment(updated); !vr.Valid() { + return nil, fmt.Errorf("environment profile rewrite failed validation: %s", strings.Join(vr.Errors, "; ")) + } + tmp, err := os.CreateTemp(filepathDir(path), ".environment-setup-*.yaml") + if err != nil { + return nil, fmt.Errorf("stage environment profile rewrite: %w", err) + } + tmpName := tmp.Name() + cleanup := true + defer func() { + if cleanup { + _ = os.Remove(tmpName) + } + }() + if _, err := tmp.Write(buf.Bytes()); err != nil { + tmp.Close() + return nil, fmt.Errorf("write environment profile rewrite: %w", err) + } + if err := tmp.Close(); err != nil { + return nil, fmt.Errorf("close environment profile rewrite: %w", err) + } + if err := os.Rename(tmpName, path); err != nil { + return nil, fmt.Errorf("rename environment profile rewrite: %w", err) + } + cleanup = false + return prior, nil +} + +func extractEnvironmentSetup(root *yaml.Node) *SetupPlan { + mapping := documentMapping(root) + if mapping == nil { + return nil + } + for i := 0; i+1 < len(mapping.Content); i += 2 { + if mapping.Content[i].Value == "setup" { + var plan SetupPlan + if err := mapping.Content[i+1].Decode(&plan); err != nil { + return nil + } + return &plan + } + } + return nil +} + +func replaceEnvironmentSetup(root *yaml.Node, plan SetupPlan) error { + mapping := documentMapping(root) + if mapping == nil { + // Build a fresh mapping when the document was empty. + mapping = &yaml.Node{Kind: yaml.MappingNode} + root.Kind = yaml.DocumentNode + root.Content = []*yaml.Node{mapping} + } + keyNode := &yaml.Node{Kind: yaml.ScalarNode, Tag: "!!str", Value: "setup"} + valueNode := &yaml.Node{} + if err := valueNode.Encode(plan); err != nil { + return err + } + for i := 0; i+1 < len(mapping.Content); i += 2 { + if mapping.Content[i].Value == "setup" { + mapping.Content[i+1] = valueNode + return nil + } + } + mapping.Content = append(mapping.Content, keyNode, valueNode) + return nil +} + +func documentMapping(root *yaml.Node) *yaml.Node { + if root == nil { + return nil + } + node := root + if node.Kind == yaml.DocumentNode && len(node.Content) > 0 { + node = node.Content[0] + } + if node.Kind == yaml.MappingNode { + return node + } + return nil +} + +func filepathDir(path string) string { + for i := len(path) - 1; i >= 0; i-- { + if path[i] == '/' || path[i] == '\\' { + return path[:i] + } + } + return "." +} + func loadYAML(path string, out any) error { data, err := os.ReadFile(path) if err != nil { diff --git a/internal/profile/schema.go b/internal/profile/schema.go index dcb51f7..c7191e3 100644 --- a/internal/profile/schema.go +++ b/internal/profile/schema.go @@ -112,6 +112,18 @@ func EnvironmentJSONSchema() ([]byte, error) { }), defaultValue(map[string]any{"cleanup": true}), ), + "setup": object( + required("commands"), + properties(map[string]any{ + "commands": arraySchema(object( + required("run"), + properties(map[string]any{ + "run": stringSchema("minLength", 1), + "why": stringSchema("minLength", 1), + }), + ), "minItems", 1), + }), + ), }), ) schema["$schema"] = "https://json-schema.org/draft/2020-12/schema" diff --git a/internal/supervisor/adapter.go b/internal/supervisor/adapter.go index c32c793..f832d64 100644 --- a/internal/supervisor/adapter.go +++ b/internal/supervisor/adapter.go @@ -40,19 +40,21 @@ type AdapterRequest struct { // AdapterEvidence is the serializable evidence passed to model supervisors. type AdapterEvidence struct { - Task task.Task `json:"task"` - Profiles profile.Bundle `json:"profiles"` - Claude runner.ClaudeResult `json:"claude"` - ParseError string `json:"parse_error,omitempty"` - RunError string `json:"run_error,omitempty"` - DiffDirty bool `json:"diff_dirty"` - Diff string `json:"diff"` - DiffError string `json:"diff_error,omitempty"` - Attempt int `json:"attempt"` - AttemptsLeft int `json:"attempts_left"` - SourceCWD string `json:"source_cwd,omitempty"` - WorktreeCWD string `json:"worktree_cwd,omitempty"` - PreflightResult any `json:"preflight_result,omitempty"` + Task task.Task `json:"task"` + Profiles profile.Bundle `json:"profiles"` + Claude runner.ClaudeResult `json:"claude"` + ParseError string `json:"parse_error,omitempty"` + RunError string `json:"run_error,omitempty"` + DiffDirty bool `json:"diff_dirty"` + Diff string `json:"diff"` + DiffError string `json:"diff_error,omitempty"` + Attempt int `json:"attempt"` + AttemptsLeft int `json:"attempts_left"` + SourceCWD string `json:"source_cwd,omitempty"` + WorktreeCWD string `json:"worktree_cwd,omitempty"` + PreflightResult any `json:"preflight_result,omitempty"` + SetupResult any `json:"setup_result,omitempty"` + SetupEnvironmentUpdate any `json:"setup_environment_update,omitempty"` } // RunAdapter reviews evidence with a built-in model supervisor. @@ -101,18 +103,20 @@ func RunAdapterPayload(ctx context.Context, opts AdapterOptions, request []byte) // NewAdapterRequest converts in-process evidence into the adapter JSON contract. func NewAdapterRequest(evidence Evidence) AdapterRequest { return AdapterRequest{Evidence: AdapterEvidence{ - Task: evidence.Task, - Profiles: evidence.Profiles, - Claude: evidence.Claude, - ParseError: errorString(evidence.ParseError), - RunError: errorString(evidence.RunError), - DiffDirty: evidence.DiffDirty, - Diff: evidence.Diff, - DiffError: errorString(evidence.DiffError), - Attempt: evidence.Attempt, - AttemptsLeft: evidence.AttemptsLeft, - SourceCWD: evidence.Task.Scope.CWD, - PreflightResult: evidence.PreflightResult, + Task: evidence.Task, + Profiles: evidence.Profiles, + Claude: evidence.Claude, + ParseError: errorString(evidence.ParseError), + RunError: errorString(evidence.RunError), + DiffDirty: evidence.DiffDirty, + Diff: evidence.Diff, + DiffError: errorString(evidence.DiffError), + Attempt: evidence.Attempt, + AttemptsLeft: evidence.AttemptsLeft, + SourceCWD: evidence.Task.Scope.CWD, + PreflightResult: evidence.PreflightResult, + SetupResult: evidence.SetupResult, + SetupEnvironmentUpdate: evidence.SetupEnvironmentUpdate, }} } diff --git a/internal/supervisor/supervisor.go b/internal/supervisor/supervisor.go index dbc944f..dd97c40 100644 --- a/internal/supervisor/supervisor.go +++ b/internal/supervisor/supervisor.go @@ -55,4 +55,10 @@ type Evidence struct { Attempt int AttemptsLeft int PreflightResult any + // SetupResult carries the setup executor preflight outcome (authored or + // learned) so reviewers see the readiness facts that gated the executor. + SetupResult any + // SetupEnvironmentUpdate carries the optional learned-plan profile update + // record, present when Galley rewrote environment.yaml with a learned setup. + SetupEnvironmentUpdate any } diff --git a/plugins/galley/skills/galley/references/environment.schema.json b/plugins/galley/skills/galley/references/environment.schema.json index fc0cb2d..4defe63 100644 --- a/plugins/galley/skills/galley/references/environment.schema.json +++ b/plugins/galley/skills/galley/references/environment.schema.json @@ -117,6 +117,36 @@ }, "type": "object" }, + "setup": { + "additionalProperties": false, + "properties": { + "commands": { + "items": { + "additionalProperties": false, + "properties": { + "run": { + "minLength": 1, + "type": "string" + }, + "why": { + "minLength": 1, + "type": "string" + } + }, + "required": [ + "run" + ], + "type": "object" + }, + "minItems": 1, + "type": "array" + } + }, + "required": [ + "commands" + ], + "type": "object" + }, "supervisor": { "additionalProperties": false, "properties": { diff --git a/plugins/galley/skills/galley/references/troubleshooting.md b/plugins/galley/skills/galley/references/troubleshooting.md index e5cdbc9..7288c11 100644 --- a/plugins/galley/skills/galley/references/troubleshooting.md +++ b/plugins/galley/skills/galley/references/troubleshooting.md @@ -84,6 +84,25 @@ For environment failure: 1. Record the missing command, secret, service, or permission. 2. Fix setup or mark as `needs_supervisor_review` if human action is required. +## Setup failures (phase=setup, kind=setup_failed) + +A `setup_failed` attempt means Galley could not make the task worktree ready before the implementation executor ran. Inspect the run directory: + +- `runs//setup_result.json` — source-of-truth setup evidence. Read these fields first: + - `status`: `ready` or `failed`. + - `commands[]`: every command Galley attempted, with `source` (`environment_setup`, `environment_commands`, `readiness_check`, or `discovered`), `exit_code`, and stdout/stderr excerpts. The full output remains in the per-command `setup_authored.N.{stdout,stderr}.log`, `setup_readiness_check.{stdout,stderr}.log`, or `setup_executor.{stdout.jsonl,stderr.log}` files. + - `source`: which command list produced the final attempt (`environment_setup` for an authored plan, `discovered` for a learned plan). + - `inspected_files[]`: repository signals the setup executor read (manifests, lockfiles, setup docs). + - `repair_guidance`: actionable guidance for fixing `environment.setup` or rerunning discovery. +- `runs//environment_update.json` — present only when Galley persisted a learned setup plan back to `environment.yaml`. Audit fields: `profile_path`, `before` (prior setup plan or null), `after` (new plan), `reason`, and `updated_at`. When a previously learned plan later fails, compare `before`/`after` to confirm which commands changed and decide whether to repair `environment.setup` by hand or to delete the `setup` field and let Galley rediscover. + +Repair flow: + +1. Read `setup_result.json` and identify the failing command (the last attempt with non-zero `exit_code`). +2. Inspect the matching `setup_authored.N.stderr.log` or `setup_executor.stderr.log`. +3. Edit `environment.yaml setup.commands` to fix the command, or remove the `setup` field entirely to let Galley discover and persist a new plan on the next run. +4. Requeue the task. + ## Report Format ```markdown diff --git a/prompts/embed.go b/prompts/embed.go index ad40312..061678d 100644 --- a/prompts/embed.go +++ b/prompts/embed.go @@ -23,6 +23,12 @@ var acceptanceSkeletonCreator string //go:embed acceptance-skeleton-creator-codex.md var acceptanceSkeletonCreatorCodex string +//go:embed setup-executor.md +var setupExecutorClaude string + +//go:embed setup-executor-codex.md +var setupExecutorCodex string + // ClaudeExecutorFull returns the built-in Claude executor system prompt. func ClaudeExecutorFull() string { return claudeExecutorFull @@ -57,3 +63,21 @@ func CodexSupervisor() string { func ClaudeSupervisor() string { return supervisorReviewCommon + "\n\n" + claudeSupervisorReview } + +// SetupExecutorClaude returns the built-in setup executor system prompt for +// the Claude provider. The prompt is authored from Claude executor, Claude +// supervisor, and acceptance-skeleton-creator conventions: Claude Code tool +// guidance, step-numbered workflow, and a single JSON object as the final +// assistant response. +func SetupExecutorClaude() string { + return setupExecutorClaude +} + +// SetupExecutorCodex returns the built-in setup executor system prompt for the +// Codex provider. The prompt is authored from Codex executor, Codex supervisor, +// and Codex acceptance-skeleton-creator conventions: Codex shell/file tool +// guidance, source-priority framing, and the final assistant message captured +// through Codex `--output-last-message`. +func SetupExecutorCodex() string { + return setupExecutorCodex +} diff --git a/prompts/setup-executor-codex.md b/prompts/setup-executor-codex.md new file mode 100644 index 0000000..7fd09cb --- /dev/null +++ b/prompts/setup-executor-codex.md @@ -0,0 +1,67 @@ +# Role + +You are the Galley setup executor running inside Codex. + +Prepare the fresh task worktree so the implementation executor can start immediately. You are NOT implementing acceptance criteria. Your job is to install dependencies, fetch tooling, and verify the repository's standard build/test commands actually run in this worktree. Return one JSON object that matches the configured setup executor result schema. + +# Inputs + +The work order message is one JSON object with these top-level keys: + +- `task`: authoritative task YAML. Only `scope.cwd` and metadata are used during setup. +- `environment`: resolved environment profile (`commands`, `constraints`, `executor`, `setup` when present, `cwd`). +- `quality`: resolved quality profile required checks. Use these to pick what proves readiness. +- `repository_signals`: declared paths Galley already inspected (manifests, lockfiles, setup docs). +- `worktree`: absolute path to the worktree you are preparing. + +# Source Priority + +1. `environment.setup` when present. Run those commands first; if they all succeed and a representative quality required check passes, return the unchanged plan as `successful_commands`. +2. `environment.commands` named like `setup`, `install`, `bootstrap`, `deps`, `build`, `test_unit`. +3. Repository setup docs, package manifests, lockfiles surfaced in `repository_signals`. +4. Repository conventions discovered in the worktree. + +You may discover and return a different successful plan only when the supplied commands do not make the worktree ready. + +# Tool And Write Rules + +- Use Codex shell and file tools to inspect manifests, lockfiles, Makefiles, scripts/, and setup-related README sections. +- Run setup commands with the Codex shell tool from inside the worktree. Capture stdout/stderr; record exit codes for every attempt. +- Do not modify source files. Cache and build directories the project's setup expects are allowed. +- Stay inside the worktree. Never touch `.git`. Never run destructive commands. Treat `.env` files as never readable. + +# Workflow + +## Step 1. Read The Contract + +- Identify whether `environment.setup` is present and list its commands. +- Detect language(s) and package manager(s) from manifests in `repository_signals`. +- Pick a quality required check that proves the build/test surface. + +## Step 2. Try The Authored Plan + +If `environment.setup.commands` exists, run each command in order. Record every attempt in `commands[]` with `source: "environment_setup"`. + +If every authored command succeeds and the chosen quality required check passes, set `status: "ready"` and copy the authored plan into `successful_commands`. Return the result. + +If any authored command fails, record the failure and proceed to Step 3. + +## Step 3. Discover + +Build the smallest ordered sequence that brings the worktree from a fresh clone to a state where a quality required check passes. Prefer commands already present in `environment.commands`. Each command goes into `commands[]` with `source` set to `environment_commands` when reused from the commands map or `discovered` when composed from repository signals. Never author entries with `source: "readiness_check"` — that value is reserved for the daemon's authored-plan readiness verification. + +## Step 4. Verify Readiness + +Run at least one quality required check (or its closest available equivalent). Cite that command's exit code and a short stdout excerpt in `readiness_evidence`. + +## Step 5. Return Result + +Return exactly one JSON object that matches the configured schema. `successful_commands` must be the ordered minimal plan that would make a fresh worktree ready if rerun. Include `why` strings so persisted environment.yaml stays human-readable. + +If you cannot make the worktree ready, set `status: "failed"`, write a terse `error`, fill `repair_guidance` with concrete next steps, and still return the attempted `commands[]` for operator diagnosis. + +# Setup-Specific Rules + +- Setup readiness EXCLUDES acceptance skeleton obligations. Do not fail because a task-specific test skeleton has not been implemented yet. +- Keep `commands[].stdout_excerpt` and `stderr_excerpt` short (final 200-400 characters at most). +- The setup executor result JSON is the only authoritative output. Print it as the final assistant message so Codex captures it through `--output-last-message`. diff --git a/prompts/setup-executor.md b/prompts/setup-executor.md new file mode 100644 index 0000000..c86519c --- /dev/null +++ b/prompts/setup-executor.md @@ -0,0 +1,73 @@ +# Role + +You are the Galley setup executor running inside Claude Code. + +Make the fresh task worktree ready for the implementation executor that runs next. You are NOT implementing the acceptance criteria — your job is to install dependencies, fetch tooling, and verify the repository's standard build/test commands run, so the implementation executor can start immediately. + +Finish with a valid Galley setup executor result JSON object. + +# Inputs + +The user message is one JSON object with these top-level keys: + +- `task`: the authoritative task YAML. Only `scope.cwd` and metadata are used during setup — acceptance criteria are NOT part of setup readiness. +- `environment`: the resolved environment profile, including `commands`, `constraints`, `executor`, `setup` (when present), and `cwd`. +- `quality`: the resolved quality profile required checks. Use these to decide which commands prove readiness. +- `repository_signals`: declared paths Galley already inspected (manifests, lockfiles, setup docs). +- `worktree`: the absolute path to the worktree you are preparing. + +# Authority And Source Of Truth + +Use this priority order: + +1. `environment.setup` when present. Run those commands first; if they all succeed and the worktree is ready, return them as the successful plan. +2. `environment.commands` named like `setup`, `install`, `bootstrap`, `deps`, `build`, `test_unit`. Prefer the smallest combination that proves the build/test surface works. +3. Repository setup docs, package manifests, and lockfiles surfaced in `repository_signals`. +4. Repository conventions discovered in the worktree. + +When `environment.setup` runs cleanly you must return it unchanged as `successful_commands`. Only discover and return a different plan when the supplied commands do not make the worktree ready. + +# Claude Code Tool Policy + +- Search and read tools: inspect manifests (package.json, go.mod, pyproject.toml, Cargo.toml), lockfiles, Makefile, scripts/, .tool-versions, and README sections that mention setup. +- Bash tool: run setup commands inside the worktree. Capture stdout/stderr; record the exit code for every attempt in `commands[]`. +- Edit/write tools: avoid writing source files. You may create cache or build directories that the project's setup expects. + +# Workflow + +## Step 1. Read The Contract [BLOCKING] + +Identify: + +- Whether `environment.setup` is present. +- Repository language(s) and package manager(s) from manifests. +- Required check commands from `quality.required_checks` that prove the build/test surface. + +## Step 2. Try The Authored Plan [BLOCKING WHEN PRESENT] + +If `environment.setup.commands` is present, run each command in order inside the worktree. Record every attempt in `commands[]` with `source: "environment_setup"`. + +If every command succeeds and a chosen quality required check passes, you are done — set `status: "ready"` and copy the authored plan into `successful_commands`. + +If any authored command fails, record the failure, then continue to Step 3 to discover a working plan. + +## Step 3. Discover [WHEN AUTHORED PLAN MISSING OR INSUFFICIENT] + +Build the smallest sequence of commands that brings the worktree from a fresh clone to a state where a representative quality required check passes. Prefer commands that already exist in `environment.commands`. Each command goes into `commands[]` with `source` set to `environment_commands` when it came from the commands map or `discovered` when you composed it from repository signals. Do NOT author entries with `source: "readiness_check"` — that value is reserved for the daemon's own readiness verification when it runs an authored `environment.setup` plan. + +## Step 4. Verify Readiness [BLOCKING] + +Run at least one quality required check (or its closest available equivalent) to prove the worktree is ready. Cite that command's exit code and a short excerpt in `readiness_evidence`. + +## Step 5. Return Result [BLOCKING] + +Return exactly one JSON object matching the configured schema. `successful_commands` must be the ordered minimal plan that, if rerun on a fresh worktree, would make it ready. Include `why` strings so the persisted environment.yaml stays human-readable. + +If you cannot make the worktree ready, set `status: "failed"`, fill `error` with the terse failure, fill `repair_guidance` with concrete next steps, and still return the attempted `commands[]` so the operator can diagnose. + +# Setup-Specific Rules + +- Setup readiness excludes acceptance skeleton obligations. Do NOT fail because a task-specific skeleton test has not been implemented yet. +- Treat secrets as never readable from .env files. If a required dependency needs credentials that are not present, set `status: "failed"` with repair guidance. +- Never run destructive commands. Never modify `.git`. Stay inside the worktree. +- Keep `commands[].stdout_excerpt` and `stderr_excerpt` short (the final 200-400 characters at most). diff --git a/schemas/embed.go b/schemas/embed.go index 7164805..98cdb6d 100644 --- a/schemas/embed.go +++ b/schemas/embed.go @@ -10,3 +10,6 @@ var ClaudeResult string //go:embed acceptance-skeleton-manifest.schema.json var AcceptanceSkeletonManifest string + +//go:embed setup-result.schema.json +var SetupResult string diff --git a/schemas/setup-result.schema.json b/schemas/setup-result.schema.json new file mode 100644 index 0000000..d0d9413 --- /dev/null +++ b/schemas/setup-result.schema.json @@ -0,0 +1,75 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "Galley Setup Executor Result", + "type": "object", + "additionalProperties": false, + "required": ["status", "commands"], + "properties": { + "status": { + "type": "string", + "enum": ["ready", "failed"], + "description": "Whether the worktree is ready for the implementation executor." + }, + "commands": { + "type": "array", + "description": "Ordered command attempts the setup executor made. Each entry is one command that was actually executed, in order, with the captured outcome. The successful_commands subset is the persistable setup plan when status is ready.", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["run", "source", "exit_code"], + "properties": { + "run": { "type": "string", "minLength": 1 }, + "why": { "type": "string" }, + "source": { + "type": "string", + "enum": ["environment_setup", "environment_commands", "discovered", "readiness_check"], + "description": "Where the executor took this command from. `readiness_check` is reserved for the daemon-authored readiness verification it runs after an authored environment.setup plan succeeds; setup executors should not author readiness_check entries." + }, + "exit_code": { "type": "integer" }, + "stdout_excerpt": { "type": "string" }, + "stderr_excerpt": { "type": "string" } + } + } + }, + "successful_commands": { + "type": "array", + "description": "The ordered subset of commands that made the worktree ready. When status is ready this is the setup plan that should be persisted to environment.yaml when the profile lacked a setup field or the learned plan differs.", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["run"], + "properties": { + "run": { "type": "string", "minLength": 1 }, + "why": { "type": "string" } + } + } + }, + "inspected_files": { + "type": "array", + "description": "Files the setup executor inspected to plan the setup, such as package manifests, lockfiles, and setup docs.", + "items": { "type": "string", "minLength": 1 } + }, + "readiness_evidence": { + "type": "string", + "description": "One or two sentences explaining why the executor concluded the worktree is ready, citing the verification it ran." + }, + "repair_guidance": { + "type": "string", + "description": "When status is failed, concrete next steps the operator can take to repair environment.setup." + }, + "error": { + "type": "string", + "description": "When status is failed, the terse failure summary." + }, + "provider": { + "type": "string", + "enum": ["claude", "codex"], + "description": "Which executor provider ran the setup attempt. The daemon sets this from task.executor.cli when it dispatches the setup executor. Authored-only runs that never dispatched an executor omit this field." + }, + "source": { + "type": "string", + "enum": ["environment_setup", "environment_commands", "discovered", "readiness_check"], + "description": "Where the canonical plan in this result came from. `environment_setup` means the authored environment.setup plan ran. `discovered` means the setup executor discovered the plan. `environment_commands` and `readiness_check` are reserved overlays kept in sync with the per-command `source` enum so a SetupResult.source can carry the same vocabulary." + } + } +} From 44918362f1bb880c558eff13f270b2fbbd7876b7 Mon Sep 17 00:00:00 2001 From: Shinsuke Kagawa Date: Mon, 25 May 2026 21:18:14 +0900 Subject: [PATCH 2/6] Harden setup executor command execution --- internal/daemon/setup_executor_preflight.go | 118 ++++++++- .../daemon/setup_executor_preflight_test.go | 67 +++++ internal/profile/profile.go | 21 ++ internal/profile/profile_test.go | 28 ++ internal/profile/schema.go | 4 +- internal/profilecmd/shell.go | 232 ++++++++++++++++ internal/profilecmd/shell_test.go | 90 +++++++ internal/result/result.go | 248 +----------------- .../scripts/require-final-json.py | 65 +++++ .../galley/references/environment.schema.json | 2 + prompts/setup-executor-codex.md | 1 + schemas/setup-result.schema.json | 20 +- 12 files changed, 636 insertions(+), 260 deletions(-) create mode 100644 internal/profilecmd/shell.go create mode 100644 internal/profilecmd/shell_test.go diff --git a/internal/daemon/setup_executor_preflight.go b/internal/daemon/setup_executor_preflight.go index fb65c2f..90a5861 100644 --- a/internal/daemon/setup_executor_preflight.go +++ b/internal/daemon/setup_executor_preflight.go @@ -30,12 +30,13 @@ import ( "errors" "fmt" "os" - "os/exec" "path/filepath" + "runtime" "strings" "time" "github.com/shinpr/galley/internal/profile" + "github.com/shinpr/galley/internal/profilecmd" "github.com/shinpr/galley/internal/runner" claudeguard "github.com/shinpr/galley/internal/runner/claude_guard_plugin" "github.com/shinpr/galley/internal/task" @@ -98,10 +99,18 @@ type SetupEnvironmentUpdate struct { Changed bool `json:"changed"` Before *profile.SetupPlan `json:"before,omitempty"` After profile.SetupPlan `json:"after"` + Diff string `json:"diff,omitempty"` Reason string `json:"reason"` UpdatedAt string `json:"updated_at"` } +const ( + maxSetupResultCommands = 50 + maxSetupResultFiles = 100 + maxSetupResultExcerptLength = 400 + maxSetupResultTextLength = 2048 +) + // SetupExecutorPreflightOptions configures one preflight invocation. The // EnvironmentProfilePath is required when the daemon should persist a learned // setup plan back to the repository environment profile (AC7). @@ -271,7 +280,23 @@ func runAuthoredSetupPlan(ctx context.Context, opts SetupExecutorPreflightOption Source: SetupSourceEnvironmentSetup, } for i, cmd := range plan.Commands { - argv := []string{shellExecutable(opts.Profiles), "-c", cmd.Run} + argv, cleanup, _, err := setupShellArgv(opts, cmd.Run, fmt.Sprintf("setup_authored.%d", i+1)) + if cleanup != nil { + defer cleanup() + } + if err != nil { + attempt := SetupCommandAttempt{ + Run: cmd.Run, + Why: cmd.Why, + Source: SetupSourceEnvironmentSetup, + ExitCode: -1, + } + res.Commands = append(res.Commands, attempt) + res.Status = SetupStatusFailed + res.Error = fmt.Sprintf("authored setup command %d shell resolution failed: %v", i+1, err) + res.RepairGuidance = "Inspect required_checks.shell and required_checks.shell_path in environment.yaml, fix the setup shell configuration, and requeue." + return res, fmt.Errorf("setup phase failed: %s", res.Error) + } command := runner.Command{Argv: argv, WorkDir: opts.WorkDir} stdoutPath := filepath.Join(opts.RunDir, fmt.Sprintf("setup_authored.%d.stdout.log", i+1)) stderrPath := filepath.Join(opts.RunDir, fmt.Sprintf("setup_authored.%d.stderr.log", i+1)) @@ -323,7 +348,22 @@ func runAuthoredReadinessCheck(ctx context.Context, opts SetupExecutorPreflightO } return } - argv := []string{shellExecutable(opts.Profiles), "-c", command} + argv, cleanup, _, shellErr := setupShellArgv(opts, command, "setup_readiness_check") + if cleanup != nil { + defer cleanup() + } + if shellErr != nil { + res.Status = SetupStatusFailed + res.Error = fmt.Sprintf("authored setup readiness check '%s' shell resolution failed: %v", check, shellErr) + res.RepairGuidance = "Inspect required_checks.shell and required_checks.shell_path in environment.yaml, fix the setup shell configuration, and requeue." + res.Commands = append(res.Commands, SetupCommandAttempt{ + Run: command, + Why: "required-check readiness verification (" + check + ")", + Source: SetupSourceReadinessCheck, + ExitCode: -1, + }) + return + } stdoutPath := filepath.Join(opts.RunDir, "setup_readiness_check.stdout.log") stderrPath := filepath.Join(opts.RunDir, "setup_readiness_check.stderr.log") out, err := runner.RunCommand(ctx, runner.Command{Argv: argv, WorkDir: opts.WorkDir}, runner.RunOptions{ @@ -546,11 +586,36 @@ func persistLearnedSetupPlan(opts SetupExecutorPreflightOptions, env *profile.En Changed: true, Before: prior, After: plan, + Diff: setupPlanDiff(prior, plan), Reason: reason, UpdatedAt: time.Now().UTC().Format(time.RFC3339Nano), }, nil } +func setupPlanDiff(before *profile.SetupPlan, after profile.SetupPlan) string { + var b strings.Builder + b.WriteString("environment.setup.commands\n") + if before == nil || len(before.Commands) == 0 { + b.WriteString("- \n") + } else { + for _, cmd := range before.Commands { + fmt.Fprintf(&b, "- run: %q", cmd.Run) + if cmd.Why != "" { + fmt.Fprintf(&b, " why: %q", cmd.Why) + } + b.WriteString("\n") + } + } + for _, cmd := range after.Commands { + fmt.Fprintf(&b, "+ run: %q", cmd.Run) + if cmd.Why != "" { + fmt.Fprintf(&b, " why: %q", cmd.Why) + } + b.WriteString("\n") + } + return b.String() +} + func setupPlansEqual(a, b profile.SetupPlan) bool { if len(a.Commands) != len(b.Commands) { return false @@ -579,12 +644,40 @@ func WriteSetupResult(runDir string, res *SetupResult) error { if res == nil { return nil } + normalizeSetupResult(res) if err := os.MkdirAll(runDir, 0o700); err != nil { return fmt.Errorf("create setup run dir: %w", err) } return writeJSON(filepath.Join(runDir, "setup_result.json"), res) } +func normalizeSetupResult(res *SetupResult) { + if res == nil { + return + } + res.ReadinessEvidence = truncateString(res.ReadinessEvidence, maxSetupResultTextLength) + res.RepairGuidance = truncateString(res.RepairGuidance, maxSetupResultTextLength) + res.Error = truncateString(res.Error, maxSetupResultTextLength) + if len(res.Commands) > maxSetupResultCommands { + res.Commands = res.Commands[:maxSetupResultCommands] + } + for i := range res.Commands { + res.Commands[i].Run = truncateString(res.Commands[i].Run, profile.MaxSetupCommandRunLength) + res.Commands[i].Why = truncateString(res.Commands[i].Why, profile.MaxSetupCommandWhyLength) + res.Commands[i].StdoutExcerpt = truncateExcerpt(res.Commands[i].StdoutExcerpt) + res.Commands[i].StderrExcerpt = truncateExcerpt(res.Commands[i].StderrExcerpt) + } + if len(res.SuccessfulCommands) > maxSetupResultCommands { + res.SuccessfulCommands = res.SuccessfulCommands[:maxSetupResultCommands] + } + if len(res.InspectedFiles) > maxSetupResultFiles { + res.InspectedFiles = res.InspectedFiles[:maxSetupResultFiles] + } + for i := range res.InspectedFiles { + res.InspectedFiles[i] = truncateString(res.InspectedFiles[i], 512) + } +} + // LoadSetupEnvironmentUpdate reads the persisted environment_update.json. // Returns (nil, nil) when the file does not exist so callers can probe // unconditionally. @@ -685,14 +778,13 @@ func WriteSetupEnvironmentUpdate(runDir string, update *SetupEnvironmentUpdate) return writeJSON(filepath.Join(runDir, "environment_update.json"), update) } -func shellExecutable(_ profile.Bundle) string { - if path, err := exec.LookPath("bash"); err == nil { - return path - } - if path, err := exec.LookPath("sh"); err == nil { - return path +func setupShellArgv(opts SetupExecutorPreflightOptions, command, name string) ([]string, func(), string, error) { + shell := profile.RequiredCheckEnvironment{} + if opts.Profiles.Environment != nil { + shell = opts.Profiles.Environment.RequiredChecks } - return "/bin/sh" + scratchDir := filepath.Join(opts.RunDir, "setup-shell", name) + return profilecmd.ShellArgvForOS(runtime.GOOS, command, scratchDir, shell) } func setupCommandTimeout(t task.Task) time.Duration { @@ -703,8 +795,11 @@ func setupCommandTimeout(t task.Task) time.Duration { } func truncateExcerpt(s string) string { - const max = 400 s = strings.TrimSpace(s) + return truncateString(s, maxSetupResultExcerptLength) +} + +func truncateString(s string, max int) string { if len(s) <= max { return s } @@ -910,5 +1005,6 @@ func parseSetupResultRaw(data []byte) (*SetupResult, bool) { if res.Status == "" || res.Commands == nil { return nil, false } + normalizeSetupResult(&res) return &res, true } diff --git a/internal/daemon/setup_executor_preflight_test.go b/internal/daemon/setup_executor_preflight_test.go index ca2393c..3b12968 100644 --- a/internal/daemon/setup_executor_preflight_test.go +++ b/internal/daemon/setup_executor_preflight_test.go @@ -6,6 +6,7 @@ import ( "fmt" "os" "path/filepath" + "runtime" "strings" "testing" @@ -242,6 +243,66 @@ func TestSetupPreflightAbsentSetupInvokesExecutorWithEnvironmentAndSignals(t *te } } +// TestSetupPreflightAuthoredPlanUsesRequiredCheckShellPath proves the authored +// setup path and the daemon-authored readiness check share the same +// profile-owned shell selection contract as required checks. A prior regression +// used a setup-only bash/sh lookup and ignored required_checks.shell_path. +func TestSetupPreflightAuthoredPlanUsesRequiredCheckShellPath(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("uses a POSIX fake shell script") + } + work := t.TempDir() + runDir := t.TempDir() + fakeShell := filepath.Join(t.TempDir(), "bash") + marker := filepath.Join(t.TempDir(), "shell.invoked") + if err := os.WriteFile(fakeShell, []byte("#!/bin/sh\nprintf invoked >> "+marker+"\nexec /bin/sh \"$@\"\n"), 0o700); err != nil { + t.Fatal(err) + } + env := &profile.Environment{ + ID: "shell-path", + CWD: work, + RequiredChecks: profile.RequiredCheckEnvironment{ + ShellPath: fakeShell, + }, + Setup: &profile.SetupPlan{Commands: []profile.SetupCommand{ + {Run: "printf setup > setup.proof", Why: "prove setup shell"}, + }}, + Constraints: profile.Constraints{ + Network: "approval_required", + SecretsPolicy: "never_read_env_files", + DestructiveCommands: "deny", + }, + } + quality := &profile.Quality{ + ID: "quality", + RequiredChecks: []profile.RequiredCheck{{ + ID: "proof", + PreferredCommands: []string{"test -f setup.proof"}, + Required: true, + }}, + PassPolicy: profile.PassPolicy{MinScore: 1}, + } + res, _, err := SetupExecutorPreflight(context.Background(), SetupExecutorPreflightOptions{ + Task: setupTask(), + WorkDir: work, + RunDir: runDir, + Profiles: profile.Bundle{Environment: env, Quality: quality}, + }) + if err != nil { + t.Fatalf("setup preflight: %v", err) + } + if res == nil || res.Status != SetupStatusReady { + t.Fatalf("setup result: %+v", res) + } + data, err := os.ReadFile(marker) + if err != nil { + t.Fatalf("fake shell marker missing: %v", err) + } + if got := strings.Count(string(data), "invoked"); got != 2 { + t.Fatalf("fake shell invocations got %d, want setup command and readiness check", got) + } +} + // TestSetupExecutorCommandPlanClaudeAndCodex proves part of AC4: building the // setup executor command plan for both Claude and Codex returns provider-shaped // argv that embeds the bin path and references the canonical capture path each @@ -291,6 +352,9 @@ func TestSetupExecutorCommandPlanClaudeAndCodex(t *testing.T) { if len(codexPlan.Argv) == 0 || codexPlan.Argv[0] != "/path/to/codex" { t.Fatalf("codex argv[0] got %v", codexPlan.Argv) } + if len(codexPlan.Env) == 0 { + t.Fatalf("codex setup executor must run with runner restricted env") + } joined := strings.Join(codexPlan.Argv, " ") if !strings.Contains(joined, "--output-last-message") { t.Fatalf("codex argv missing --output-last-message: %v", codexPlan.Argv) @@ -469,6 +533,9 @@ pr: if update1 == nil || !update1.Changed { t.Fatalf("first run should have persisted plan: %+v", update1) } + if !strings.Contains(update1.Diff, "+ run: \"echo learned\"") { + t.Fatalf("environment update missing setup diff: %+v", update1) + } // Verify the file was atomically rewritten with the new setup AND unrelated // content is preserved. diff --git a/internal/profile/profile.go b/internal/profile/profile.go index 75a9990..d2cae70 100644 --- a/internal/profile/profile.go +++ b/internal/profile/profile.go @@ -78,6 +78,11 @@ type SetupCommand struct { Why string `yaml:"why,omitempty" json:"why,omitempty"` } +const ( + MaxSetupCommandRunLength = 4096 + MaxSetupCommandWhyLength = 1024 +) + type ExecutorDefault struct { DefaultCLI string `yaml:"default_cli,omitempty" json:"default_cli,omitempty"` } @@ -255,11 +260,27 @@ func ValidateEnvironment(env Environment) ValidationResult { if strings.TrimSpace(cmd.Run) == "" { result.Errors = append(result.Errors, fmt.Sprintf("%s.run is required", prefix)) } + validateSetupCommandText(&result, prefix+".run", cmd.Run, MaxSetupCommandRunLength) + if cmd.Why != "" { + validateSetupCommandText(&result, prefix+".why", cmd.Why, MaxSetupCommandWhyLength) + } } } return result } +func validateSetupCommandText(result *ValidationResult, field, value string, max int) { + if len(value) > max { + result.Errors = append(result.Errors, fmt.Sprintf("%s must be at most %d bytes", field, max)) + } + for _, r := range value { + if r < 0x20 && r != '\n' && r != '\r' && r != '\t' { + result.Errors = append(result.Errors, fmt.Sprintf("%s must not contain control characters other than tab or newline", field)) + return + } + } +} + func validExecutorCLI(value string) bool { switch value { case "claude", "codex": diff --git a/internal/profile/profile_test.go b/internal/profile/profile_test.go index 0a93c24..0fe046b 100644 --- a/internal/profile/profile_test.go +++ b/internal/profile/profile_test.go @@ -175,6 +175,34 @@ func TestValidateEnvironmentRejectsInvalidRequiredCheckShell(t *testing.T) { } } +func TestValidateEnvironmentRejectsUnsafeSetupCommandTextShape(t *testing.T) { + env := validEnvironmentForTest() + env.Setup = &SetupPlan{Commands: []SetupCommand{{ + Run: strings.Repeat("x", MaxSetupCommandRunLength+1), + Why: "too long run", + }}} + result := ValidateEnvironment(env) + if result.Valid() { + t.Fatal("expected oversized setup command to be invalid") + } + if !strings.Contains(strings.Join(result.Errors, "\n"), "setup.commands[0].run") { + t.Fatalf("expected setup command run error, got %#v", result.Errors) + } + + env = validEnvironmentForTest() + env.Setup = &SetupPlan{Commands: []SetupCommand{{ + Run: "printf ok\x00", + Why: "contains nul", + }}} + result = ValidateEnvironment(env) + if result.Valid() { + t.Fatal("expected setup command with control char to be invalid") + } + if !strings.Contains(strings.Join(result.Errors, "\n"), "control characters") { + t.Fatalf("expected control character error, got %#v", result.Errors) + } +} + func validEnvironmentForTest() Environment { return Environment{ ID: "local", diff --git a/internal/profile/schema.go b/internal/profile/schema.go index c7191e3..ea439b5 100644 --- a/internal/profile/schema.go +++ b/internal/profile/schema.go @@ -118,8 +118,8 @@ func EnvironmentJSONSchema() ([]byte, error) { "commands": arraySchema(object( required("run"), properties(map[string]any{ - "run": stringSchema("minLength", 1), - "why": stringSchema("minLength", 1), + "run": stringSchema("minLength", 1, "maxLength", MaxSetupCommandRunLength), + "why": stringSchema("minLength", 1, "maxLength", MaxSetupCommandWhyLength), }), ), "minItems", 1), }), diff --git a/internal/profilecmd/shell.go b/internal/profilecmd/shell.go new file mode 100644 index 0000000..8a48bce --- /dev/null +++ b/internal/profilecmd/shell.go @@ -0,0 +1,232 @@ +package profilecmd + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/shinpr/galley/internal/profile" +) + +// LookupFunc and StatFunc let tests exercise shell discovery without relying +// on the host machine's PATH or filesystem. +type LookupFunc func(string) (string, error) +type StatFunc func(string) (os.FileInfo, error) + +// Resolver controls the host interactions used by ShellArgvForOS. +type Resolver struct { + LookPath LookupFunc + StatFile StatFunc +} + +func (r Resolver) withDefaults() Resolver { + if r.LookPath == nil { + r.LookPath = exec.LookPath + } + if r.StatFile == nil { + r.StatFile = os.Stat + } + return r +} + +// ShellArgvForOS returns the argv for executing a profile-owned shell command, +// plus an optional cleanup function for materialized script files and the +// resolved shell kind. It is shared by required checks and environment.setup so +// both surfaces honor required_checks.shell and required_checks.shell_path. +func ShellArgvForOS(goos, command, scratchDir string, shell profile.RequiredCheckEnvironment) ([]string, func(), string, error) { + return ShellArgvForOSWithResolver(goos, command, scratchDir, shell, Resolver{}) +} + +// ShellArgvForOSWithResolver is ShellArgvForOS with injectable host lookups. +func ShellArgvForOSWithResolver(goos, command, scratchDir string, shell profile.RequiredCheckEnvironment, resolver Resolver) ([]string, func(), string, error) { + resolver = resolver.withDefaults() + resolved, err := resolveShellForOS(goos, shell.Shell, shell.ShellPath, resolver) + if err != nil { + return nil, nil, "", err + } + if goos != "windows" { + return shellArgv(resolved, command), nil, resolved.Kind, nil + } + dir := scratchDir + cleanup := func() {} + if dir == "" { + tmp, err := os.MkdirTemp("", "galley-windows-check-*") + if err != nil { + return nil, nil, "", fmt.Errorf("create windows verification script dir: %w", err) + } + dir = tmp + cleanup = func() { _ = os.RemoveAll(tmp) } + } else if err := os.MkdirAll(dir, 0o700); err != nil { + return nil, nil, "", fmt.Errorf("create windows verification script dir %s: %w", dir, err) + } + ext := ".cmd" + body := []byte("@echo off\r\n" + command + "\r\n") + if resolved.Kind == "bash" { + ext = ".sh" + body = []byte("#!/usr/bin/env bash\nset -e\n" + command + "\n") + } else if resolved.Kind == "sh" { + ext = ".sh" + body = []byte("set -e\n" + command + "\n") + } else if resolved.Kind == "powershell" || resolved.Kind == "pwsh" { + ext = ".ps1" + body = []byte("$ErrorActionPreference = 'Stop'\r\n" + command + "\r\n") + } + scriptPath := filepath.Join(dir, "galley-verification"+ext) + if err := os.WriteFile(scriptPath, body, 0o600); err != nil { + cleanup() + return nil, nil, "", fmt.Errorf("write windows verification script %s: %w", scriptPath, err) + } + return shellScriptArgv(resolved, scriptPath), cleanup, resolved.Kind, nil +} + +type resolvedShell struct { + Kind string + Bin string +} + +func resolveShellForOS(goos, configured, configuredPath string, resolver Resolver) (resolvedShell, error) { + if configuredPath != "" { + kind := profile.InferRequiredCheckShellKind(configuredPath) + if kind == "" { + switch configured { + case "sh", "bash", "cmd", "powershell", "pwsh": + kind = configured + default: + return resolvedShell{}, fmt.Errorf("required_checks.shell_path basename is not a recognized shell executable; set an explicit required_checks.shell kind (sh, bash, cmd, powershell, or pwsh) as fallback metadata") + } + } + return resolvedShell{Kind: kind, Bin: configuredPath}, nil + } + switch configured { + case "", "auto": + if goos == "windows" { + if bash, ok := discoverWindowsBash(resolver); ok { + return resolvedShell{Kind: "bash", Bin: bash}, nil + } + return resolvedShell{Kind: "cmd", Bin: "cmd.exe"}, nil + } + return resolvedShell{Kind: "sh", Bin: "/bin/sh"}, nil + case "sh", "bash", "cmd", "powershell", "pwsh": + shell := shellForKind(configured) + if goos == "windows" && configured == "bash" { + bash, ok := discoverWindowsBash(resolver) + if !ok { + return resolvedShell{}, fmt.Errorf( + "required_checks.shell is \"bash\" on Windows but no standard Git for Windows Bash install was discoverable; " + + "PATH-discovered bash entries (WSL launcher at C:\\Windows\\System32\\bash.exe, WindowsApps shims, MSYS2, Cygwin, Scoop, or Chocolatey-managed Bashes) are not auto-selected to avoid silently switching the required-check shell; " + + "set required_checks.shell_path to the exact bash executable path you want Galley to launch") + } + shell.Bin = bash + } + return shell, nil + default: + return resolvedShell{}, fmt.Errorf("unsupported required check shell %q", configured) + } +} + +// standardWindowsGitBashPaths lists the canonical Git for Windows install +// layouts that Galley should prefer for profile command shell auto-discovery. +var standardWindowsGitBashPaths = []string{ + `C:\Program Files\Git\bin\bash.exe`, + `C:\Program Files\Git\usr\bin\bash.exe`, + `C:\Program Files (x86)\Git\bin\bash.exe`, + `C:\Program Files (x86)\Git\usr\bin\bash.exe`, +} + +func discoverWindowsBash(resolver Resolver) (string, bool) { + for _, candidate := range standardWindowsGitBashPaths { + if _, err := resolver.StatFile(candidate); err == nil { + return candidate, true + } + } + for _, name := range []string{"git.exe", "git"} { + gitPath, err := resolver.LookPath(name) + if err != nil { + continue + } + if bashPath := gitBashFromGitPath(gitPath); bashPath != "" { + if _, err := resolver.StatFile(bashPath); err == nil { + return bashPath, true + } + } + } + for _, name := range []string{"bash.exe", "bash"} { + path, err := resolver.LookPath(name) + if err != nil { + continue + } + if !isStandardGitForWindowsBashPath(path) { + continue + } + return path, true + } + return "", false +} + +func isStandardGitForWindowsBashPath(path string) bool { + normalized := strings.ToLower(strings.ReplaceAll(path, `\`, `/`)) + for _, std := range standardWindowsGitBashPaths { + if normalized == strings.ToLower(strings.ReplaceAll(std, `\`, `/`)) { + return true + } + } + return false +} + +func gitBashFromGitPath(gitPath string) string { + normalized := strings.ReplaceAll(gitPath, `\`, `/`) + lower := strings.ToLower(normalized) + for _, suffix := range []string{"/cmd/git.exe", "/cmd/git"} { + if strings.HasSuffix(lower, suffix) { + return normalized[:len(normalized)-len(suffix)] + "/bin/bash.exe" + } + } + return "" +} + +func shellForKind(kind string) resolvedShell { + switch kind { + case "bash": + return resolvedShell{Kind: kind, Bin: "bash"} + case "cmd": + return resolvedShell{Kind: kind, Bin: "cmd.exe"} + case "powershell": + return resolvedShell{Kind: kind, Bin: "powershell.exe"} + case "pwsh": + return resolvedShell{Kind: kind, Bin: "pwsh"} + default: + return resolvedShell{Kind: "sh", Bin: "/bin/sh"} + } +} + +func shellArgv(shell resolvedShell, command string) []string { + switch shell.Kind { + case "bash": + return []string{shell.Bin, "-c", command} + case "cmd": + return []string{shell.Bin, "/C", command} + case "powershell": + return []string{shell.Bin, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command} + case "pwsh": + return []string{shell.Bin, "-NoProfile", "-Command", command} + default: + return []string{shell.Bin, "-c", command} + } +} + +func shellScriptArgv(shell resolvedShell, scriptPath string) []string { + switch shell.Kind { + case "bash": + return []string{shell.Bin, scriptPath} + case "sh": + return []string{shell.Bin, scriptPath} + case "powershell": + return []string{shell.Bin, "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", scriptPath} + case "pwsh": + return []string{shell.Bin, "-NoProfile", "-File", scriptPath} + default: + return []string{shell.Bin, "/C", scriptPath} + } +} diff --git a/internal/profilecmd/shell_test.go b/internal/profilecmd/shell_test.go new file mode 100644 index 0000000..1f9d71f --- /dev/null +++ b/internal/profilecmd/shell_test.go @@ -0,0 +1,90 @@ +package profilecmd + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/shinpr/galley/internal/profile" +) + +func TestShellArgvForOSHonorsExplicitShellPathWithoutDiscovery(t *testing.T) { + lookCalls := 0 + statCalls := 0 + argv, cleanup, shell, err := ShellArgvForOSWithResolver("linux", "echo ok", "", profile.RequiredCheckEnvironment{ + ShellPath: "/opt/galley/bin/bash", + }, Resolver{ + LookPath: func(string) (string, error) { + lookCalls++ + return "", os.ErrNotExist + }, + StatFile: func(string) (os.FileInfo, error) { + statCalls++ + return nil, os.ErrNotExist + }, + }) + if cleanup != nil { + defer cleanup() + } + if err != nil { + t.Fatalf("shell argv: %v", err) + } + if shell != "bash" { + t.Fatalf("shell got %q, want bash", shell) + } + if got := argv[0]; got != "/opt/galley/bin/bash" { + t.Fatalf("argv[0] got %q, want explicit shell_path", got) + } + if lookCalls != 0 || statCalls != 0 { + t.Fatalf("explicit shell_path must skip discovery, look=%d stat=%d", lookCalls, statCalls) + } +} + +func TestShellArgvForOSWindowsBashRejectsNonStandardPathDiscovery(t *testing.T) { + _, cleanup, _, err := ShellArgvForOSWithResolver("windows", "echo ok", t.TempDir(), profile.RequiredCheckEnvironment{ + Shell: "bash", + }, Resolver{ + LookPath: func(file string) (string, error) { + if file == "bash.exe" || file == "bash" { + return `C:\Windows\System32\bash.exe`, nil + } + return "", os.ErrNotExist + }, + StatFile: func(string) (os.FileInfo, error) { + return nil, os.ErrNotExist + }, + }) + if cleanup != nil { + defer cleanup() + } + if err == nil { + t.Fatal("expected non-standard Windows bash discovery to fail") + } + if !strings.Contains(err.Error(), "required_checks.shell_path") { + t.Fatalf("error must name shell_path override, got %q", err.Error()) + } +} + +func TestShellArgvForOSWindowsPowershellUsesProfileShellShape(t *testing.T) { + scratch := t.TempDir() + argv, cleanup, shell, err := ShellArgvForOS("windows", "Write-Output ok", scratch, profile.RequiredCheckEnvironment{ + Shell: "pwsh", + }) + if cleanup != nil { + defer cleanup() + } + if err != nil { + t.Fatalf("shell argv: %v", err) + } + if shell != "pwsh" { + t.Fatalf("shell got %q, want pwsh", shell) + } + if len(argv) < 4 || argv[0] != "pwsh" || argv[1] != "-NoProfile" || argv[len(argv)-2] != "-File" { + t.Fatalf("pwsh argv shape got %#v", argv) + } + scriptPath := argv[len(argv)-1] + if filepath.Ext(scriptPath) != ".ps1" { + t.Fatalf("pwsh script path got %q, want .ps1", scriptPath) + } +} diff --git a/internal/result/result.go b/internal/result/result.go index 9eb7170..68710f4 100644 --- a/internal/result/result.go +++ b/internal/result/result.go @@ -13,6 +13,7 @@ import ( "github.com/shinpr/galley/internal/jsonio" "github.com/shinpr/galley/internal/profile" + "github.com/shinpr/galley/internal/profilecmd" "github.com/shinpr/galley/internal/runner" "github.com/shinpr/galley/internal/task" ) @@ -218,247 +219,14 @@ func requiredCheckShellSettings(env *profile.Environment) (string, string) { return env.RequiredChecks.Shell, env.RequiredChecks.ShellPath } -// shellArgvForOS returns the argv that runVerification should hand to the -// shared runner, plus an optional cleanup function for materialized script -// files. On Windows, an unset shell resolves to a standard Git for Windows -// Bash install when one is discoverable and falls back to cmd.exe; PATH -// entries that point at the WSL launcher (System32\bash.exe) or a -// WindowsApps shim are not selected as Git Bash. An explicit configured -// shell kind paired with a configuredShellPath uses that executable path -// verbatim and skips discovery. func shellArgvForOS(goos, command, scratchDir, configuredShell, configuredShellPath string) ([]string, func(), string, error) { - shell, err := resolveShellForOS(goos, configuredShell, configuredShellPath) - if err != nil { - return nil, nil, "", err - } - if goos != "windows" { - return shellArgv(shell, command), nil, shell.Kind, nil - } - dir := scratchDir - cleanup := func() {} - if dir == "" { - tmp, err := os.MkdirTemp("", "galley-windows-check-*") - if err != nil { - return nil, nil, "", fmt.Errorf("create windows verification script dir: %w", err) - } - dir = tmp - cleanup = func() { _ = os.RemoveAll(tmp) } - } else if err := os.MkdirAll(dir, 0o700); err != nil { - return nil, nil, "", fmt.Errorf("create windows verification script dir %s: %w", dir, err) - } - ext := ".cmd" - body := []byte("@echo off\r\n" + command + "\r\n") - if shell.Kind == "bash" { - ext = ".sh" - body = []byte("#!/usr/bin/env bash\nset -e\n" + command + "\n") - } else if shell.Kind == "sh" { - ext = ".sh" - body = []byte("set -e\n" + command + "\n") - } else if shell.Kind == "powershell" || shell.Kind == "pwsh" { - ext = ".ps1" - body = []byte("$ErrorActionPreference = 'Stop'\r\n" + command + "\r\n") - } - scriptPath := filepath.Join(dir, "galley-verification"+ext) - if err := os.WriteFile(scriptPath, body, 0o600); err != nil { - cleanup() - return nil, nil, "", fmt.Errorf("write windows verification script %s: %w", scriptPath, err) - } - return shellScriptArgv(shell, scriptPath), cleanup, shell.Kind, nil -} - -type resolvedShell struct { - Kind string - Bin string -} - -func resolveShellForOS(goos, configured, configuredPath string) (resolvedShell, error) { - // required_checks.shell_path is the more specific executable selection. - // When set, infer the shell invocation style from the executable basename - // so the configured `required_checks.shell` cannot drive an incompatible - // argv shape against the chosen executable. When the basename is not a - // recognized shell, fall back to a concrete configured shell kind as - // fallback metadata. Profile validation rejects the no-fallback case at - // load time; the resolver guards it again so an unexpectedly-loaded - // profile surfaces a clear error. - if configuredPath != "" { - kind := profile.InferRequiredCheckShellKind(configuredPath) - if kind == "" { - switch configured { - case "sh", "bash", "cmd", "powershell", "pwsh": - kind = configured - default: - return resolvedShell{}, fmt.Errorf("required_checks.shell_path basename is not a recognized shell executable; set an explicit required_checks.shell kind (sh, bash, cmd, powershell, or pwsh) as fallback metadata") - } - } - return resolvedShell{Kind: kind, Bin: configuredPath}, nil - } - switch configured { - case "", "auto": - if goos == "windows" { - if bash, ok := discoverWindowsBash(); ok { - return resolvedShell{Kind: "bash", Bin: bash}, nil - } - return resolvedShell{Kind: "cmd", Bin: "cmd.exe"}, nil - } - return resolvedShell{Kind: "sh", Bin: "/bin/sh"}, nil - case "sh", "bash", "cmd", "powershell", "pwsh": - shell := shellForKind(configured) - // AC4: When Windows required checks ask for Bash through - // `required_checks.shell: bash` and no `shell_path` is set, prefer - // standard Git for Windows Bash discovery over PATH-discovered WSL - // launchers, WindowsApps shims, or other non-standard Bash entries. - // When no standard Git for Windows Bash is discoverable, the resolver - // must NOT fall back to a bare `bash` executable: bare `bash` lets - // PATH lookup at exec time silently pick up the very entries that - // discoverWindowsBash just rejected (WSL launcher, WindowsApps shim, - // MSYS2, Cygwin, Scoop, Chocolatey-managed Bashes), defeating the - // rejection. Return a clear resolver error that names - // `required_checks.shell_path` as the explicit override path instead, - // so the operator opts in to a specific non-standard Bash rather than - // letting Galley silently launch one. - if goos == "windows" && configured == "bash" { - bash, ok := discoverWindowsBash() - if !ok { - return resolvedShell{}, fmt.Errorf( - "required_checks.shell is \"bash\" on Windows but no standard Git for Windows Bash install was discoverable; " + - "PATH-discovered bash entries (WSL launcher at C:\\Windows\\System32\\bash.exe, WindowsApps shims, MSYS2, Cygwin, Scoop, or Chocolatey-managed Bashes) are not auto-selected to avoid silently switching the required-check shell; " + - "set required_checks.shell_path to the exact bash executable path you want Galley to launch") - } - shell.Bin = bash - } - return shell, nil - default: - return resolvedShell{}, fmt.Errorf("unsupported required check shell %q", configured) - } -} - -// standardWindowsGitBashPaths lists the canonical Git for Windows install -// layouts that Galley should prefer for required-check shell auto-discovery. -// These are written with Windows backslashes to match what statFile receives -// from the OS path resolver on real Windows hosts. -var standardWindowsGitBashPaths = []string{ - `C:\Program Files\Git\bin\bash.exe`, - `C:\Program Files\Git\usr\bin\bash.exe`, - `C:\Program Files (x86)\Git\bin\bash.exe`, - `C:\Program Files (x86)\Git\usr\bin\bash.exe`, -} - -func discoverWindowsBash() (string, bool) { - // 1) Prefer canonical Git for Windows install locations regardless of - // PATH ordering, so a WSL launcher or WindowsApps shim that happens - // to be earlier on PATH does not shadow a real Git Bash install. - for _, candidate := range standardWindowsGitBashPaths { - if _, err := statFile(candidate); err == nil { - return candidate, true - } - } - // 2) Infer Git Bash from a discoverable git.exe (cmd/git.exe -> bin/bash.exe). - // This covers portable Git layouts that match the cmd/git -> bin/bash - // convention without sitting under "Program Files". - for _, name := range []string{"git.exe", "git"} { - gitPath, err := lookPath(name) - if err != nil { - continue - } - if bashPath := gitBashFromGitPath(gitPath); bashPath != "" { - if _, err := statFile(bashPath); err == nil { - return bashPath, true - } - } - } - // 3) As a last resort, accept a PATH-discovered bash.exe only when it - // resolves to one of the canonical Git for Windows install paths. - // This preserves Git for Windows installs that statFile in step 1 - // cannot observe (PATH-only or test environments) while refusing - // arbitrary PATH entries — including the WSL launcher, WindowsApps - // shims, MSYS2, Cygwin, Scoop, and Chocolatey-managed Bashes. Those - // non-standard Bashes must be opted into via required_checks.shell - // plus required_checks.shell_path, never auto-selected from PATH. - for _, name := range []string{"bash.exe", "bash"} { - path, err := lookPath(name) - if err != nil { - continue - } - if !isStandardGitForWindowsBashPath(path) { - continue - } - return path, true - } - return "", false -} - -// isStandardGitForWindowsBashPath returns true when the supplied path -// matches one of the canonical Git for Windows install locations in -// standardWindowsGitBashPaths. Matching is case-insensitive and tolerates -// either backslash or forward-slash separators so PATH-discovered entries -// using the alternate separator form still resolve. Any non-matching path -// — including the WSL launcher, WindowsApps shim, MSYS2, Cygwin, Scoop, -// or Chocolatey-managed Bashes — is rejected and the auto resolver falls -// back to cmd.exe so required checks do not silently switch shells. -func isStandardGitForWindowsBashPath(path string) bool { - normalized := strings.ToLower(strings.ReplaceAll(path, `\`, `/`)) - for _, std := range standardWindowsGitBashPaths { - if normalized == strings.ToLower(strings.ReplaceAll(std, `\`, `/`)) { - return true - } - } - return false -} - -func gitBashFromGitPath(gitPath string) string { - normalized := strings.ReplaceAll(gitPath, `\`, `/`) - lower := strings.ToLower(normalized) - for _, suffix := range []string{"/cmd/git.exe", "/cmd/git"} { - if strings.HasSuffix(lower, suffix) { - return normalized[:len(normalized)-len(suffix)] + "/bin/bash.exe" - } - } - return "" -} - -func shellForKind(kind string) resolvedShell { - switch kind { - case "bash": - return resolvedShell{Kind: kind, Bin: "bash"} - case "cmd": - return resolvedShell{Kind: kind, Bin: "cmd.exe"} - case "powershell": - return resolvedShell{Kind: kind, Bin: "powershell.exe"} - case "pwsh": - return resolvedShell{Kind: kind, Bin: "pwsh"} - default: - return resolvedShell{Kind: "sh", Bin: "/bin/sh"} - } -} - -func shellArgv(shell resolvedShell, command string) []string { - switch shell.Kind { - case "bash": - return []string{shell.Bin, "-c", command} - case "cmd": - return []string{shell.Bin, "/C", command} - case "powershell": - return []string{shell.Bin, "-NoProfile", "-ExecutionPolicy", "Bypass", "-Command", command} - case "pwsh": - return []string{shell.Bin, "-NoProfile", "-Command", command} - default: - return []string{shell.Bin, "-c", command} - } -} - -func shellScriptArgv(shell resolvedShell, scriptPath string) []string { - switch shell.Kind { - case "bash": - return []string{shell.Bin, scriptPath} - case "sh": - return []string{shell.Bin, scriptPath} - case "powershell": - return []string{shell.Bin, "-NoProfile", "-ExecutionPolicy", "Bypass", "-File", scriptPath} - case "pwsh": - return []string{shell.Bin, "-NoProfile", "-File", scriptPath} - default: - return []string{shell.Bin, "/C", scriptPath} - } + return profilecmd.ShellArgvForOSWithResolver(goos, command, scratchDir, profile.RequiredCheckEnvironment{ + Shell: configuredShell, + ShellPath: configuredShellPath, + }, profilecmd.Resolver{ + LookPath: lookPath, + StatFile: statFile, + }) } func (r verificationRun) status() string { diff --git a/internal/runner/claude_guard_plugin/scripts/require-final-json.py b/internal/runner/claude_guard_plugin/scripts/require-final-json.py index 83cda61..c579886 100755 --- a/internal/runner/claude_guard_plugin/scripts/require-final-json.py +++ b/internal/runner/claude_guard_plugin/scripts/require-final-json.py @@ -61,6 +61,28 @@ "no_skeletons": [] }""" +SETUP_EXECUTOR_TEMPLATE = """{ + "status": "ready", + "commands": [ + { + "run": "setup command that was run", + "why": "Why this command is part of setup", + "source": "environment_commands", + "exit_code": 0, + "stdout_excerpt": "Short relevant output excerpt", + "stderr_excerpt": "" + } + ], + "successful_commands": [ + { + "run": "setup command that should be persisted", + "why": "Why this command makes the worktree ready" + } + ], + "inspected_files": ["package.json"], + "readiness_evidence": "The setup command and a representative required check passed." +}""" + def block(reason): print(json.dumps({ @@ -290,6 +312,40 @@ def validate_creator_manifest(manifest): raise ValueError(f"no_skeletons[{index}].{field} must be a non-empty string") +def validate_setup_executor_result(result): + require_object(result, "setup_result") + if result.get("status") not in {"ready", "failed"}: + raise ValueError("status must be ready or failed") + if "commands" not in result: + raise ValueError("commands is required") + require_array(result["commands"], "commands") + if len(result["commands"]) > 50: + raise ValueError("commands must contain at most 50 entries") + for index, command in enumerate(result["commands"]): + require_object(command, f"commands[{index}]") + for field in ["run", "source", "exit_code"]: + if field not in command: + raise ValueError(f"commands[{index}].{field} is required") + if not isinstance(command["run"], str) or not command["run"].strip(): + raise ValueError(f"commands[{index}].run must be a non-empty string") + if len(command["run"]) > 4096: + raise ValueError(f"commands[{index}].run is too long") + if command.get("source") not in {"environment_setup", "environment_commands", "discovered", "readiness_check"}: + raise ValueError(f"commands[{index}].source is invalid") + if not isinstance(command["exit_code"], int): + raise ValueError(f"commands[{index}].exit_code must be an integer") + if "successful_commands" in result: + require_array(result["successful_commands"], "successful_commands") + if len(result["successful_commands"]) > 50: + raise ValueError("successful_commands must contain at most 50 entries") + for index, command in enumerate(result["successful_commands"]): + require_object(command, f"successful_commands[{index}]") + if not isinstance(command.get("run"), str) or not command["run"].strip(): + raise ValueError(f"successful_commands[{index}].run must be a non-empty string") + if len(command["run"]) > 4096: + raise ValueError(f"successful_commands[{index}].run is too long") + + def main(): try: hook_input = json.load(sys.stdin) @@ -316,6 +372,8 @@ def main(): validate_supervisor_verdict(result) elif mode == "acceptance_skeleton_creator": validate_creator_manifest(result) + elif mode == "setup_executor": + validate_setup_executor_result(result) else: validate_result(result) except (json.JSONDecodeError, ValueError, TypeError) as err: @@ -333,6 +391,13 @@ def main(): "Respond again with only the JSON object. Use this shape and fill it with the actual generated skeleton files:\n\n" f"{CREATOR_TEMPLATE}" ) + elif mode == "setup_executor": + block( + "The final assistant response must be exactly one JSON object matching the Galley setup executor result contract.\n\n" + f"Validation error: {err}\n\n" + "Respond again with only the JSON object. Use this shape and fill it with the actual setup evidence:\n\n" + f"{SETUP_EXECUTOR_TEMPLATE}" + ) else: block( "The final assistant response must be exactly one JSON object matching the Galley executor result contract.\n\n" diff --git a/plugins/galley/skills/galley/references/environment.schema.json b/plugins/galley/skills/galley/references/environment.schema.json index 4defe63..b6e9119 100644 --- a/plugins/galley/skills/galley/references/environment.schema.json +++ b/plugins/galley/skills/galley/references/environment.schema.json @@ -125,10 +125,12 @@ "additionalProperties": false, "properties": { "run": { + "maxLength": 4096, "minLength": 1, "type": "string" }, "why": { + "maxLength": 1024, "minLength": 1, "type": "string" } diff --git a/prompts/setup-executor-codex.md b/prompts/setup-executor-codex.md index 7fd09cb..d1dcc3f 100644 --- a/prompts/setup-executor-codex.md +++ b/prompts/setup-executor-codex.md @@ -29,6 +29,7 @@ You may discover and return a different successful plan only when the supplied c - Run setup commands with the Codex shell tool from inside the worktree. Capture stdout/stderr; record exit codes for every attempt. - Do not modify source files. Cache and build directories the project's setup expects are allowed. - Stay inside the worktree. Never touch `.git`. Never run destructive commands. Treat `.env` files as never readable. +- If setup requires credentials, private registry access, or external services that are unavailable, set `status: "failed"` with concrete repair guidance instead of reading secrets or guessing. # Workflow diff --git a/schemas/setup-result.schema.json b/schemas/setup-result.schema.json index d0d9413..3320ca4 100644 --- a/schemas/setup-result.schema.json +++ b/schemas/setup-result.schema.json @@ -12,53 +12,59 @@ }, "commands": { "type": "array", + "maxItems": 50, "description": "Ordered command attempts the setup executor made. Each entry is one command that was actually executed, in order, with the captured outcome. The successful_commands subset is the persistable setup plan when status is ready.", "items": { "type": "object", "additionalProperties": false, "required": ["run", "source", "exit_code"], "properties": { - "run": { "type": "string", "minLength": 1 }, - "why": { "type": "string" }, + "run": { "type": "string", "minLength": 1, "maxLength": 4096 }, + "why": { "type": "string", "maxLength": 1024 }, "source": { "type": "string", "enum": ["environment_setup", "environment_commands", "discovered", "readiness_check"], "description": "Where the executor took this command from. `readiness_check` is reserved for the daemon-authored readiness verification it runs after an authored environment.setup plan succeeds; setup executors should not author readiness_check entries." }, "exit_code": { "type": "integer" }, - "stdout_excerpt": { "type": "string" }, - "stderr_excerpt": { "type": "string" } + "stdout_excerpt": { "type": "string", "maxLength": 400 }, + "stderr_excerpt": { "type": "string", "maxLength": 400 } } } }, "successful_commands": { "type": "array", + "maxItems": 50, "description": "The ordered subset of commands that made the worktree ready. When status is ready this is the setup plan that should be persisted to environment.yaml when the profile lacked a setup field or the learned plan differs.", "items": { "type": "object", "additionalProperties": false, "required": ["run"], "properties": { - "run": { "type": "string", "minLength": 1 }, - "why": { "type": "string" } + "run": { "type": "string", "minLength": 1, "maxLength": 4096 }, + "why": { "type": "string", "maxLength": 1024 } } } }, "inspected_files": { "type": "array", + "maxItems": 100, "description": "Files the setup executor inspected to plan the setup, such as package manifests, lockfiles, and setup docs.", - "items": { "type": "string", "minLength": 1 } + "items": { "type": "string", "minLength": 1, "maxLength": 512 } }, "readiness_evidence": { "type": "string", + "maxLength": 2048, "description": "One or two sentences explaining why the executor concluded the worktree is ready, citing the verification it ran." }, "repair_guidance": { "type": "string", + "maxLength": 2048, "description": "When status is failed, concrete next steps the operator can take to repair environment.setup." }, "error": { "type": "string", + "maxLength": 2048, "description": "When status is failed, the terse failure summary." }, "provider": { From 2daf3b77df1e8c645d7a8a06761e030374790066 Mon Sep 17 00:00:00 2001 From: Shinsuke Kagawa Date: Mon, 25 May 2026 21:28:54 +0900 Subject: [PATCH 3/6] Clarify setup executor output contract --- prompts/setup-executor-codex.md | 67 ++++++++++++++++++++++++++++++++- prompts/setup-executor.md | 67 ++++++++++++++++++++++++++++++++- 2 files changed, 131 insertions(+), 3 deletions(-) diff --git a/prompts/setup-executor-codex.md b/prompts/setup-executor-codex.md index d1dcc3f..3c64e7f 100644 --- a/prompts/setup-executor-codex.md +++ b/prompts/setup-executor-codex.md @@ -57,12 +57,77 @@ Run at least one quality required check (or its closest available equivalent). C ## Step 5. Return Result -Return exactly one JSON object that matches the configured schema. `successful_commands` must be the ordered minimal plan that would make a fresh worktree ready if rerun. Include `why` strings so persisted environment.yaml stays human-readable. +Run the Self Quality Gate below, then return exactly one JSON object that matches the Output Contract. If you cannot make the worktree ready, set `status: "failed"`, write a terse `error`, fill `repair_guidance` with concrete next steps, and still return the attempted `commands[]` for operator diagnosis. +# Self Quality Gate + +Before returning the final JSON, verify: + +- Every `commands[]` entry has `run`, `source`, and `exit_code`. +- `status: "ready"` has non-empty `successful_commands`, `readiness_evidence`, and top-level `source`. +- Every `successful_commands[].run` also appears in `commands[]`. +- Setup executor output does not use `source: "readiness_check"`; that source is reserved for the daemon. +- `status: "failed"` has `error` and `repair_guidance`. + # Setup-Specific Rules - Setup readiness EXCLUDES acceptance skeleton obligations. Do not fail because a task-specific test skeleton has not been implemented yet. - Keep `commands[].stdout_excerpt` and `stderr_excerpt` short (final 200-400 characters at most). - The setup executor result JSON is the only authoritative output. Print it as the final assistant message so Codex captures it through `--output-last-message`. + +# Output Contract + +Return one JSON object as the entire response body. Use no Markdown fences, commentary, logs, or surrounding text. + +Use this shape for a ready worktree: + +```json +{ + "status": "ready", + "source": "environment_commands", + "commands": [ + { + "run": "npm ci", + "why": "Install locked project dependencies.", + "source": "environment_commands", + "exit_code": 0, + "stdout_excerpt": "added packages", + "stderr_excerpt": "" + } + ], + "successful_commands": [ + { + "run": "npm ci", + "why": "Install locked project dependencies." + } + ], + "inspected_files": ["package.json", "package-lock.json"], + "readiness_evidence": "`npm ci` exited 0 and the selected quality required check passed." +} +``` + +Set top-level `source` to `environment_setup` when the authored setup plan made the worktree ready unchanged, `environment_commands` when the successful plan reuses environment commands, or `discovered` when the successful plan is composed from repository signals or conventions. After an authored plan fails, use the `source` of the replacement plan that made the worktree ready. + +Use this shape when setup cannot make the worktree ready: + +```json +{ + "status": "failed", + "source": "discovered", + "commands": [ + { + "run": "npm ci", + "why": "Install locked project dependencies.", + "source": "environment_commands", + "exit_code": 1, + "stdout_excerpt": "", + "stderr_excerpt": "authentication required" + } + ], + "inspected_files": ["package.json", "package-lock.json"], + "error": "Dependency installation requires unavailable private registry credentials.", + "repair_guidance": "Configure the registry credentials for this repository or author environment.setup with the approved internal install command." +} +``` diff --git a/prompts/setup-executor.md b/prompts/setup-executor.md index c86519c..3d66ab1 100644 --- a/prompts/setup-executor.md +++ b/prompts/setup-executor.md @@ -31,7 +31,7 @@ When `environment.setup` runs cleanly you must return it unchanged as `successfu - Search and read tools: inspect manifests (package.json, go.mod, pyproject.toml, Cargo.toml), lockfiles, Makefile, scripts/, .tool-versions, and README sections that mention setup. - Bash tool: run setup commands inside the worktree. Capture stdout/stderr; record the exit code for every attempt in `commands[]`. -- Edit/write tools: avoid writing source files. You may create cache or build directories that the project's setup expects. +- Edit/write tools: only write into cache or build directories the project's setup expects. Source files stay unchanged. # Workflow @@ -61,13 +61,76 @@ Run at least one quality required check (or its closest available equivalent) to ## Step 5. Return Result [BLOCKING] -Return exactly one JSON object matching the configured schema. `successful_commands` must be the ordered minimal plan that, if rerun on a fresh worktree, would make it ready. Include `why` strings so the persisted environment.yaml stays human-readable. +Run the Self Quality Gate below, then return exactly one JSON object matching the Result Contract. The Stop hook validates that the final assistant response is parseable JSON with the required fields and enum values, and will ask for a corrected response when the JSON is missing or invalid. If you cannot make the worktree ready, set `status: "failed"`, fill `error` with the terse failure, fill `repair_guidance` with concrete next steps, and still return the attempted `commands[]` so the operator can diagnose. +# Self Quality Gate + +Before returning the final JSON, verify: + +- Every `commands[]` entry records the command `run`, its `source`, and its `exit_code`. +- `status: "ready"` includes non-empty `successful_commands`, `readiness_evidence`, and a top-level `source` for the successful plan. +- `successful_commands[].run` values are commands you actually ran and recorded in `commands[]`. +- Setup executor output never uses `source: "readiness_check"`; that source is reserved for the daemon's own authored-plan verification. +- `status: "failed"` includes both `error` and `repair_guidance`. + # Setup-Specific Rules - Setup readiness excludes acceptance skeleton obligations. Do NOT fail because a task-specific skeleton test has not been implemented yet. - Treat secrets as never readable from .env files. If a required dependency needs credentials that are not present, set `status: "failed"` with repair guidance. - Never run destructive commands. Never modify `.git`. Stay inside the worktree. - Keep `commands[].stdout_excerpt` and `stderr_excerpt` short (the final 200-400 characters at most). + +# Result Contract + +Your final assistant response is the setup executor result. Return exactly one JSON object as the entire response body. Use this shape for a ready worktree: + +```json +{ + "status": "ready", + "source": "environment_commands", + "commands": [ + { + "run": "npm ci", + "why": "Install locked project dependencies.", + "source": "environment_commands", + "exit_code": 0, + "stdout_excerpt": "added packages", + "stderr_excerpt": "" + } + ], + "successful_commands": [ + { + "run": "npm ci", + "why": "Install locked project dependencies." + } + ], + "inspected_files": ["package.json", "package-lock.json"], + "readiness_evidence": "`npm ci` exited 0 and the selected quality required check passed." +} +``` + +Use top-level `source` for the successful plan: `environment_setup` when the authored setup plan made the worktree ready unchanged, `environment_commands` when the successful plan reuses environment commands, and `discovered` when the successful plan is composed from repository signals or conventions. After an authored plan fails, set top-level `source` from the replacement plan that made the worktree ready. + +Use this shape when setup cannot make the worktree ready: + +```json +{ + "status": "failed", + "source": "discovered", + "commands": [ + { + "run": "npm ci", + "why": "Install locked project dependencies.", + "source": "environment_commands", + "exit_code": 1, + "stdout_excerpt": "", + "stderr_excerpt": "authentication required" + } + ], + "inspected_files": ["package.json", "package-lock.json"], + "error": "Dependency installation requires unavailable private registry credentials.", + "repair_guidance": "Configure the registry credentials for this repository or author environment.setup with the approved internal install command." +} +``` From de4af5e931d18d7e290fb8b915cf5ac8a8afb72f Mon Sep 17 00:00:00 2001 From: Shinsuke Kagawa Date: Mon, 25 May 2026 21:40:17 +0900 Subject: [PATCH 4/6] Update Galley plugin setup troubleshooting --- .claude-plugin/marketplace.json | 2 +- CHANGELOG.md | 10 ++++--- plugins/galley/.claude-plugin/plugin.json | 2 +- plugins/galley/.codex-plugin/plugin.json | 2 +- .../galley/references/troubleshooting.md | 29 ++++++++++++++----- 5 files changed, 30 insertions(+), 15 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index c2345de..f8bb681 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ "name": "galley", "source": "./plugins/galley", "description": "Create, validate, queue, set up, and troubleshoot Galley AFK task workflows.", - "version": "0.1.12", + "version": "0.1.13", "author": { "name": "Shinsuke Kagawa", "url": "https://github.com/shinpr" diff --git a/CHANGELOG.md b/CHANGELOG.md index 34ef924..647acb6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,12 @@ This project follows semantic versioning. ### Added -- Setup executor preflight phase. After the worktree and input files are prepared and before the acceptance skeleton preflight and implementation executor, Galley runs a setup phase that either executes the operator-authored `environment.setup.commands` or, when absent, dispatches a setup executor (Claude or Codex per task `executor.cli`) to discover and validate a working setup plan. On success the learned plan is atomically written back to `environment.yaml` so subsequent tasks reuse it without rediscovery. -- `environment.yaml` now accepts a top-level `setup` block whose `commands[]` list is ordered shell commands with optional `why` annotations. The bundled environment schema requires `commands` to be non-empty and matches `ValidateEnvironment`. -- New run evidence artifacts: `runs//setup_result.json` records attempted commands, command source (`environment_setup`, `environment_commands`, `readiness_check`, or `discovered`), inspected files, readiness evidence, and repair guidance; `runs//environment_update.json` records the before/after diff and rewrite reason when Galley persists a learned plan. -- Setup failure classification. A failed setup phase records `phase: setup`, `kind: setup_failed` on the task and surfaces attempted commands, inspected files, stdout/stderr excerpts, and repair guidance in `setup_result.json` so operators can repair `environment.setup` and requeue. +- Setup executor preflight phase and `environment.yaml` `setup.commands[]`. Galley now prepares fresh task worktrees before acceptance skeleton creation and implementation by running authored setup commands or dispatching a setup executor to learn a reusable setup plan, which is persisted back to `environment.yaml` on success. +- Setup run evidence and failure routing. Setup now writes `setup_result.json` and, when a learned plan is persisted, `environment_update.json`; setup failures are classified as `phase: setup`, `kind: setup_failed` with repair guidance for `environment.setup`. + +### Changed + +- Packaged Claude and Codex Galley plugins are now versioned as `0.1.13`: setup executor prompts now include explicit result JSON contracts and troubleshooting guidance routes `setup_failed` diagnosis through setup run evidence. ## v0.6.2 - 2026-05-25 diff --git a/plugins/galley/.claude-plugin/plugin.json b/plugins/galley/.claude-plugin/plugin.json index ee1b38c..96b8c00 100644 --- a/plugins/galley/.claude-plugin/plugin.json +++ b/plugins/galley/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "galley", "description": "Create, validate, queue, set up, and troubleshoot Galley AFK task workflows.", - "version": "0.1.12", + "version": "0.1.13", "author": { "name": "Shinsuke Kagawa", "url": "https://github.com/shinpr" diff --git a/plugins/galley/.codex-plugin/plugin.json b/plugins/galley/.codex-plugin/plugin.json index e960007..d9723eb 100644 --- a/plugins/galley/.codex-plugin/plugin.json +++ b/plugins/galley/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "galley", - "version": "0.1.12", + "version": "0.1.13", "description": "Create, validate, queue, set up, and troubleshoot Galley AFK task workflows.", "author": { "name": "Shinsuke Kagawa", diff --git a/plugins/galley/skills/galley/references/troubleshooting.md b/plugins/galley/skills/galley/references/troubleshooting.md index 7288c11..b8fcf17 100644 --- a/plugins/galley/skills/galley/references/troubleshooting.md +++ b/plugins/galley/skills/galley/references/troubleshooting.md @@ -39,6 +39,8 @@ Inspect the latest attempt directory: | `command_plan.json` | Claude executor invocation plan | | `codex_supervisor_request.json` | Evidence sent to Codex supervisor | | `claude_supervisor_request.json` | Evidence sent to Claude supervisor | +| `setup_result.json` (run directory, not `attempt-N/`) | Setup phase result: attempted commands, source, readiness evidence, and repair guidance | +| `environment_update.json` (run directory, when present) | Audit trail for a learned setup plan persisted back to `environment.yaml` | Read the files that exist. Focus on the newest attempt first. @@ -53,6 +55,7 @@ Read the files that exist. Focus on the newest attempt first. | `accepted` with quality concerns | pass policy treats concern as non-blocking | Add quality profile or PR comment requeue with specific blocker. | | no diff produced | task was investigation-only or executor stopped early | Check executor result and work order. | | PR comments ignored | polling disabled in `environment.yaml`, auth missing, or comment ID already processed | Check `pr.comments.enabled`, `gh auth status`, and `pr.processed_comment_ids`. | +| attempt kind is `setup_failed` | authored `environment.setup` failed, readiness verification failed, or setup discovery could not make the worktree ready | Read `setup_result.json` and the matching stderr log; fix `environment.yaml setup.commands` or remove the `setup` field. See Setup failures below. | ## Repair Actions @@ -86,23 +89,33 @@ For environment failure: ## Setup failures (phase=setup, kind=setup_failed) -A `setup_failed` attempt means Galley could not make the task worktree ready before the implementation executor ran. Inspect the run directory: +A `setup_failed` attempt means Galley could not make the task worktree ready before the implementation executor ran. Setup evidence lives in the run directory (`~/.galley/runs///`), not under `attempt-N/`. -- `runs//setup_result.json` — source-of-truth setup evidence. Read these fields first: - - `status`: `ready` or `failed`. - - `commands[]`: every command Galley attempted, with `source` (`environment_setup`, `environment_commands`, `readiness_check`, or `discovered`), `exit_code`, and stdout/stderr excerpts. The full output remains in the per-command `setup_authored.N.{stdout,stderr}.log`, `setup_readiness_check.{stdout,stderr}.log`, or `setup_executor.{stdout.jsonl,stderr.log}` files. - - `source`: which command list produced the final attempt (`environment_setup` for an authored plan, `discovered` for a learned plan). +- `setup_result.json` — source-of-truth setup evidence. Read these fields first: + - `status`: should be `failed` for a `setup_failed` attempt. A `ready` value here indicates daemon/executor disagreement; escalate with the run evidence. + - `commands[]`: every command Galley attempted, with `source` (`environment_setup`, `environment_commands`, `readiness_check`, or `discovered`), `exit_code`, and stdout/stderr excerpts. + - `source`: which command list produced the final plan. Use `environment_setup` for an authored plan, `environment_commands` for a plan reused from `environment.commands`, and `discovered` for a plan composed from repository signals or conventions. `readiness_check` is daemon-owned and appears only in `commands[].source`. - `inspected_files[]`: repository signals the setup executor read (manifests, lockfiles, setup docs). - `repair_guidance`: actionable guidance for fixing `environment.setup` or rerunning discovery. -- `runs//environment_update.json` — present only when Galley persisted a learned setup plan back to `environment.yaml`. Audit fields: `profile_path`, `before` (prior setup plan or null), `after` (new plan), `reason`, and `updated_at`. When a previously learned plan later fails, compare `before`/`after` to confirm which commands changed and decide whether to repair `environment.setup` by hand or to delete the `setup` field and let Galley rediscover. +- Matching full-output logs: + - `setup_authored.N.stdout.log` / `setup_authored.N.stderr.log` — authored `environment.setup` command at position `N`. + - `setup_readiness_check.stdout.log` / `setup_readiness_check.stderr.log` — daemon readiness verification after an authored plan. + - `setup_executor.stdout.jsonl` / `setup_executor.stderr.log` — setup executor invocation; stdout is the provider JSONL stream. +- `environment_update.json` — present only when Galley persisted or attempted to persist a learned setup plan back to `environment.yaml`. Audit fields: `profile_path`, `changed` (true when a setup plan was written; false when an update record exists but no profile change was published), `before` (prior setup plan or null), `after` (new plan), `diff`, `reason`, and `updated_at`. `diff` is a text representation of the setup command change and may be empty when no change was published. When a previously learned plan later fails, compare `before`/`after` to confirm which commands changed. Repair flow: 1. Read `setup_result.json` and identify the failing command (the last attempt with non-zero `exit_code`). -2. Inspect the matching `setup_authored.N.stderr.log` or `setup_executor.stderr.log`. -3. Edit `environment.yaml setup.commands` to fix the command, or remove the `setup` field entirely to let Galley discover and persist a new plan on the next run. +2. Inspect the matching log by `commands[].source`: `environment_setup` -> `setup_authored.N.stderr.log`; `readiness_check` -> `setup_readiness_check.stderr.log`; `environment_commands` or `discovered` -> `setup_executor.stderr.log` and `setup_executor.stdout.jsonl`. +3. Edit `environment.yaml setup.commands` when the failing command needs a small fix such as a typo, missing flag, or stale script name and the rest of the plan is still valid. Remove the `setup` field when the failure looks structural, such as a changed package manager, renamed setup flow, or toolchain migration, and rediscovery is cheaper than triage. 4. Requeue the task. +```bash +galley task requeue --reason "retry after repairing environment.setup" +``` + +For setup failures, populate `Evidence:` in the report with the failing `setup_result.json` command, the matching stderr log path, and `environment_update.json` before/after or `diff` when a learned plan changed. + ## Report Format ```markdown From 87fe9c75400800bb25c92a314f60ae5d378ad4d1 Mon Sep 17 00:00:00 2001 From: Shinsuke Kagawa Date: Mon, 25 May 2026 22:07:48 +0900 Subject: [PATCH 5/6] Fix Codex setup executor output schema --- internal/daemon/setup_executor_preflight.go | 2 +- internal/runner/codex.go | 122 +++++++++++++++---- internal/runner/codex_output_capture_test.go | 95 +++++++++++++++ 3 files changed, 192 insertions(+), 27 deletions(-) diff --git a/internal/daemon/setup_executor_preflight.go b/internal/daemon/setup_executor_preflight.go index 90a5861..57c8f6e 100644 --- a/internal/daemon/setup_executor_preflight.go +++ b/internal/daemon/setup_executor_preflight.go @@ -916,7 +916,7 @@ func buildCodexSetupExecutorCommandPlan(opts SetupExecutorPreflightOptions, payl codexOpts.WorkDir = opts.WorkDir codexOpts.Prompt = string(payload) codexOpts.SystemPrompt = prompts.SetupExecutorCodex() - codexOpts.JSONSchema = schemas.SetupResult + codexOpts.JSONSchema = runner.CodexCompatibleOutputSchema(schemas.SetupResult) codexOpts.AttemptDir = opts.RunDir commandPlan, err := runner.CodexCommandPlan(codexOpts) diff --git a/internal/runner/codex.go b/internal/runner/codex.go index 785d18a..da045f2 100644 --- a/internal/runner/codex.go +++ b/internal/runner/codex.go @@ -204,39 +204,109 @@ func withDefaultEmbeddedCodexOptions(opts CodexOptions) CodexOptions { // CodexExecutorResultSchema returns the executor result schema shape accepted // by `codex exec --output-schema`. Codex currently rejects JSON Schema // conditionals such as allOf/if/then/else in response_format schemas and -// requires every top-level property to be listed in required. The runner -// converts hard_stop into a required nullable field before invoking Codex. -// Galley still validates the parsed result with ClaudeResult.Validate(), which -// preserves the hard_stop semantic requirement after the model responds. +// requires every object property to be listed in required. The runner keeps +// optional semantics by making originally optional properties nullable before +// invoking Codex. Galley still validates the parsed result with +// ClaudeResult.Validate(), which preserves semantic requirements after the +// model responds. func CodexExecutorResultSchema() string { - var doc map[string]any - if err := json.Unmarshal([]byte(schemas.ClaudeResult), &doc); err != nil { - return schemas.ClaudeResult - } - delete(doc, "allOf") - props, _ := doc["properties"].(map[string]any) - if hardStop, _ := props["hard_stop"].(map[string]any); hardStop != nil { - hardStop["type"] = []any{"object", "null"} - } - if props != nil { - names := make([]string, 0, len(props)) - for name := range props { - names = append(names, name) - } - sort.Strings(names) - required := make([]any, 0, len(names)) - for _, name := range names { - required = append(required, name) - } - doc["required"] = required - } + return CodexCompatibleOutputSchema(schemas.ClaudeResult) +} + +// CodexCompatibleOutputSchema adapts Galley's persisted JSON schemas for the +// stricter response_format subset used by `codex exec --output-schema`. +func CodexCompatibleOutputSchema(schema string) string { + var doc any + if err := json.Unmarshal([]byte(schema), &doc); err != nil { + return schema + } + normalizeCodexOutputSchema(doc) out, err := json.MarshalIndent(doc, "", " ") if err != nil { - return schemas.ClaudeResult + return schema } return string(out) + "\n" } +func normalizeCodexOutputSchema(node any) { + switch v := node.(type) { + case map[string]any: + delete(v, "allOf") + props, _ := v["properties"].(map[string]any) + originalRequired := requiredNameSet(v["required"]) + if props != nil { + names := make([]string, 0, len(props)) + for name := range props { + names = append(names, name) + } + sort.Strings(names) + required := make([]any, 0, len(names)) + for _, name := range names { + required = append(required, name) + prop := props[name] + if !originalRequired[name] { + allowNullSchema(prop) + } + normalizeCodexOutputSchema(prop) + } + v["required"] = required + } + if items, ok := v["items"]; ok { + normalizeCodexOutputSchema(items) + } + case []any: + for _, item := range v { + normalizeCodexOutputSchema(item) + } + } +} + +func requiredNameSet(raw any) map[string]bool { + out := map[string]bool{} + items, ok := raw.([]any) + if !ok { + return out + } + for _, item := range items { + if name, ok := item.(string); ok { + out[name] = true + } + } + return out +} + +func allowNullSchema(node any) { + m, ok := node.(map[string]any) + if !ok { + return + } + if enum, ok := m["enum"].([]any); ok && !containsNull(enum) { + m["enum"] = append(enum, nil) + } + switch t := m["type"].(type) { + case string: + if t != "null" { + m["type"] = []any{t, "null"} + } + case []any: + if !containsNull(t) { + m["type"] = append(t, "null") + } + } +} + +func containsNull(items []any) bool { + for _, item := range items { + if item == nil { + return true + } + if s, ok := item.(string); ok && s == "null" { + return true + } + } + return false +} + func combinePromptForCodex(systemPrompt, workOrder string) string { if systemPrompt == "" { return workOrder diff --git a/internal/runner/codex_output_capture_test.go b/internal/runner/codex_output_capture_test.go index a05c598..c967eca 100644 --- a/internal/runner/codex_output_capture_test.go +++ b/internal/runner/codex_output_capture_test.go @@ -12,6 +12,8 @@ import ( "path/filepath" "strings" "testing" + + "github.com/shinpr/galley/schemas" ) func TestCodexCommandPlanMaterializesEmbeddedSchemaWhenOnlyContentAvailable(t *testing.T) { @@ -93,6 +95,59 @@ func TestCodexCommandPlanMaterializesEmbeddedSchemaWhenOnlyContentAvailable(t *t } } +func TestCodexCompatibleOutputSchemaRecursivelyRequiresObjectProperties(t *testing.T) { + t.Parallel() + body := CodexCompatibleOutputSchema(schemas.SetupResult) + var doc map[string]any + if err := json.Unmarshal([]byte(body), &doc); err != nil { + t.Fatalf("compatible setup schema is not valid JSON: %v", err) + } + + props := objectProp(t, doc, "properties") + topRequired := requiredSet(t, doc) + for name := range props { + if !topRequired[name] { + t.Fatalf("top-level setup schema property %q is not required", name) + } + } + successful := objectProp(t, props, "successful_commands") + if !typeAllowsNull(successful["type"]) { + t.Fatalf("optional successful_commands must be nullable for Codex strict schema: %#v", successful["type"]) + } + readiness := objectProp(t, props, "readiness_evidence") + if !typeAllowsNull(readiness["type"]) { + t.Fatalf("optional readiness_evidence must be nullable for Codex strict schema: %#v", readiness["type"]) + } + + commands := objectProp(t, props, "commands") + commandItems := objectProp(t, commands, "items") + commandProps := objectProp(t, commandItems, "properties") + commandRequired := requiredSet(t, commandItems) + for name := range commandProps { + if !commandRequired[name] { + t.Fatalf("commands.items property %q is not required", name) + } + } + why := objectProp(t, commandProps, "why") + if !typeAllowsNull(why["type"]) { + t.Fatalf("optional commands.items.why must be nullable: %#v", why["type"]) + } + stdout := objectProp(t, commandProps, "stdout_excerpt") + if !typeAllowsNull(stdout["type"]) { + t.Fatalf("optional commands.items.stdout_excerpt must be nullable: %#v", stdout["type"]) + } + + successItems := objectProp(t, successful, "items") + successRequired := requiredSet(t, successItems) + if !successRequired["why"] { + t.Fatalf("successful_commands.items.why must be required for Codex strict schema") + } + successWhy := objectProp(t, objectProp(t, successItems, "properties"), "why") + if !typeAllowsNull(successWhy["type"]) { + t.Fatalf("optional successful_commands.items.why must be nullable: %#v", successWhy["type"]) + } +} + func TestCodexCommandPlanReusesCallerSuppliedSchemaFile(t *testing.T) { t.Parallel() dir := t.TempDir() @@ -174,3 +229,43 @@ func flagValue(t *testing.T, argv []string, name string) string { } return "" } + +func objectProp(t *testing.T, parent map[string]any, name string) map[string]any { + t.Helper() + got, ok := parent[name].(map[string]any) + if !ok { + t.Fatalf("property %q is not an object: %#v", name, parent[name]) + } + return got +} + +func requiredSet(t *testing.T, schema map[string]any) map[string]bool { + t.Helper() + raw, ok := schema["required"].([]any) + if !ok { + t.Fatalf("schema missing required array: %#v", schema["required"]) + } + out := make(map[string]bool, len(raw)) + for _, item := range raw { + name, ok := item.(string) + if !ok { + t.Fatalf("required entry is not a string: %#v", item) + } + out[name] = true + } + return out +} + +func typeAllowsNull(raw any) bool { + switch value := raw.(type) { + case string: + return value == "null" + case []any: + for _, item := range value { + if item == "null" { + return true + } + } + } + return false +} From 42bf2c85b6e739d9a3fb494f83ea838aeaf7c5a1 Mon Sep 17 00:00:00 2001 From: Shinsuke Kagawa Date: Mon, 25 May 2026 22:16:13 +0900 Subject: [PATCH 6/6] Make PowerShell install smoke use local checkout --- .github/workflows/ci.yml | 2 +- scripts/install.ps1 | 49 +++++++++++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c869648..1a032a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -64,7 +64,7 @@ jobs: shell: powershell run: | $binDir = Join-Path $env:RUNNER_TEMP "galley-ps-bin" - powershell -NoProfile -ExecutionPolicy Bypass -File scripts/install.ps1 -BinDir $binDir + powershell -NoProfile -ExecutionPolicy Bypass -File scripts/install.ps1 -Local -BinDir $binDir & (Join-Path $binDir "galley.exe") --help | Out-Null - name: Validate examples diff --git a/scripts/install.ps1 b/scripts/install.ps1 index 9fc4fd9..82c6b0e 100644 --- a/scripts/install.ps1 +++ b/scripts/install.ps1 @@ -1,6 +1,7 @@ param( [string]$Version = $(if ($env:GALLEY_VERSION) { $env:GALLEY_VERSION } else { "latest" }), - [string]$BinDir = $(if ($env:GALLEY_BIN_DIR) { $env:GALLEY_BIN_DIR } else { Join-Path $HOME ".local\bin" }) + [string]$BinDir = $(if ($env:GALLEY_BIN_DIR) { $env:GALLEY_BIN_DIR } else { Join-Path $HOME ".local\bin" }), + [switch]$Local ) $ErrorActionPreference = "Stop" @@ -45,7 +46,29 @@ function Stop-ExistingDaemon { } } -if ($Version -eq "latest") { +function Install-Local { + param([string]$Dest) + + if (-not (Get-Command go -ErrorAction SilentlyContinue)) { + throw "go is required for -Local install" + } + + Stop-ExistingDaemon $Dest + Write-Host "Installing galley from local checkout into $BinDir" + + $previousGoBin = $env:GOBIN + try { + $env:GOBIN = $BinDir + & go install ./cmd/galley + if ($LASTEXITCODE -ne 0) { + throw "go install ./cmd/galley failed with exit code $LASTEXITCODE" + } + } finally { + $env:GOBIN = $previousGoBin + } +} + +if (-not $Local -and $Version -eq "latest") { $Version = Resolve-LatestVersion if (-not $Version) { throw "could not resolve latest Galley release" @@ -65,17 +88,21 @@ New-Item -ItemType Directory -Force $tmpDir | Out-Null try { New-Item -ItemType Directory -Force $BinDir | Out-Null - Write-Host "Downloading $url" - Invoke-WebRequest $url -OutFile $archive -UseBasicParsing + if ($Local) { + Install-Local $dest + } else { + Write-Host "Downloading $url" + Invoke-WebRequest $url -OutFile $archive -UseBasicParsing - tar.exe -xzf $archive -C $tmpDir galley.exe - $src = Join-Path $tmpDir "galley.exe" - if (-not (Test-Path $src)) { - throw "release asset did not contain galley.exe" - } + tar.exe -xzf $archive -C $tmpDir galley.exe + $src = Join-Path $tmpDir "galley.exe" + if (-not (Test-Path $src)) { + throw "release asset did not contain galley.exe" + } - Stop-ExistingDaemon $dest - Copy-Item $src $dest -Force + Stop-ExistingDaemon $dest + Copy-Item $src $dest -Force + } & $dest --help | Out-Null