diff --git a/.goreleaser.yml b/.goreleaser.yml index 9fc8536a2..87c74ef18 100644 --- a/.goreleaser.yml +++ b/.goreleaser.yml @@ -130,6 +130,19 @@ builds: ldflags: - -s -w + - id: sbx-bootstrap-linux + main: ./cmd/agentsh-sbx-bootstrap + binary: agentsh-sbx-bootstrap + env: + - CGO_ENABLED=0 + goos: + - linux + goarch: + - amd64 + - arm64 + ldflags: + - -s -w -X main.version={{.Version}} + # unixwrap: seccomp wrapper for Linux amd64 (requires CGO + libseccomp) - id: unixwrap-linux-amd64 main: ./cmd/agentsh-unixwrap @@ -307,10 +320,12 @@ nfpms: - unixwrap-linux-amd64 - unixwrap-linux-arm64 - stub-linux + - sbx-bootstrap-linux # NEW: bootstrap binary lands in /usr/bin formats: - deb - rpm - archlinux + - apk bindir: /usr/bin vendor: AgentSH homepage: https://github.com/agentsh/agentsh @@ -361,6 +376,72 @@ nfpms: dst: /usr/lib/agentsh/bash_startup.sh file_info: mode: 0755 + # Auto-wrap agent harness (paired with the Docker Sandboxes mixin kit). + - src: packaging/agent-wrap.sh + dst: /usr/lib/agentsh/agent-wrap + file_info: + mode: 0755 + - src: packaging/install-agent-wrappers.sh + dst: /usr/lib/agentsh/install-agent-wrappers.sh + file_info: + mode: 0755 + # Coding-agent policy template for Docker Sandboxes mixin bootstrap. + # Installed read-only — the bootstrap writes the merged result to + # /etc/agentsh/policies/default.yaml on each sandbox start. + - src: configs/policies/coding-agent.yaml + dst: /usr/share/agentsh/coding-agent.template.yaml + file_info: + mode: 0644 + + # Shim directory + symlinks (Docker Sandboxes mixin support). + # /usr/lib/agentsh/shims is prepended to PATH inside sandboxes via + # /etc/profile.d/agentsh.sh (written by the mixin kit's initFiles). + - dst: /usr/lib/agentsh/shims + type: dir + file_info: + mode: 0755 + - dst: /usr/lib/agentsh/shims/bash + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/sh + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/curl + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/wget + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/pip + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/pip3 + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/npm + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/node + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/git + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/python + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/python3 + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/rm + src: /usr/bin/agentsh-shell-shim + type: symlink + + # Packaged policy reference (also lives in repo at docs/policy-reference.md). + - src: docs/policy-reference.md + dst: /usr/share/doc/agentsh/policy-reference.md + file_info: + mode: 0644 # Policy files - src: configs/policies/*.yaml dst: /etc/agentsh/policies/ @@ -388,6 +469,9 @@ nfpms: release: draft: false prerelease: auto + extra_files: + - glob: scripts/install-agentsh.sh + name_template: install.sh # Homebrew cask is published by the publish-homebrew-cask job in release.yml # (not managed by GoReleaser — the cask installs the signed DMG, not raw binaries) diff --git a/Makefile b/Makefile index 62c722529..54568096d 100644 --- a/Makefile +++ b/Makefile @@ -58,6 +58,9 @@ dns-test: docker build -f Dockerfile.dns-test -t agentsh-dns-test . docker run --rm --cap-add SYS_PTRACE agentsh-dns-test +sbx-e2e: + bash docker/sbx-kit/tests/run-e2e.sh + seccomp-probe: mkdir -p build GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o build/seccomp-probe ./cmd/seccomp-probe/ diff --git a/cmd/agentsh-sbx-bootstrap/daemon.go b/cmd/agentsh-sbx-bootstrap/daemon.go new file mode 100644 index 000000000..824402779 --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/daemon.go @@ -0,0 +1,56 @@ +package main + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "time" +) + +// spawnDaemon fork-execs `bin args...` with stdout/stderr appended to logPath. +// The child is detached; the returned *exec.Cmd lets the caller signal it if +// needed (in normal flow the bootstrap exits after probing and the daemon +// keeps running, reparented to PID 1). +func spawnDaemon(bin string, args []string, logPath string) (*exec.Cmd, error) { + if err := os.MkdirAll(filepath.Dir(logPath), 0o755); err != nil { + return nil, fmt.Errorf("mkdir log dir: %w", err) + } + logF, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return nil, fmt.Errorf("open log: %w", err) + } + cmd := exec.Command(bin, args...) + cmd.Stdout = logF + cmd.Stderr = logF + cmd.Env = os.Environ() + if err := cmd.Start(); err != nil { + logF.Close() + return nil, fmt.Errorf("start %s: %w", bin, err) + } + // Release the parent's reference to the log file FD now that exec(2) has + // dup'd stdio into the child. The child keeps its own dup'd FD. + _ = logF.Close() + return cmd, nil +} + +// waitForSocket polls for a filesystem entry at sockPath, returning nil as +// soon as it exists. Returns an error if the deadline elapses first. +// +// We check existence rather than `Dial` because the daemon may use a +// different socket type (gRPC vs HTTP) and a successful Dial isn't required +// to confirm "the daemon has started writing its socket" — only that the +// file exists. +func waitForSocket(sockPath string, deadline time.Duration) error { + end := time.Now().Add(deadline) + for time.Now().Before(end) { + if _, err := os.Stat(sockPath); err == nil { + return nil + } else if !errors.Is(err, os.ErrNotExist) { + return fmt.Errorf("stat socket %q: %w", sockPath, err) + } + time.Sleep(50 * time.Millisecond) + } + return fmt.Errorf("socket %q did not appear within %s", sockPath, deadline) +} diff --git a/cmd/agentsh-sbx-bootstrap/daemon_test.go b/cmd/agentsh-sbx-bootstrap/daemon_test.go new file mode 100644 index 000000000..94871e449 --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/daemon_test.go @@ -0,0 +1,69 @@ +package main + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" + "time" +) + +func TestSpawnDaemonAndWait_SocketAppears(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("unix sockets only") + } + dir := t.TempDir() + sock := filepath.Join(dir, "agentsh.sock") + + // Fake "daemon": a shell script that writes the socket file after a small + // delay. The bootstrap should observe it within the 2s window. + fakeBin := filepath.Join(dir, "fake-agentsh") + script := "#!/bin/sh\n(sleep 0.1; touch '" + sock + "') &\nexec sleep 5\n" + if err := os.WriteFile(fakeBin, []byte(script), 0o755); err != nil { + t.Fatal(err) + } + + logPath := filepath.Join(dir, "bootstrap.log") + cmd, err := spawnDaemon(fakeBin, []string{"server"}, logPath) + if err != nil { + t.Fatalf("spawnDaemon: %v", err) + } + t.Cleanup(func() { _ = cmd.Process.Kill() }) + + if err := waitForSocket(sock, 2*time.Second); err != nil { + t.Fatalf("waitForSocket: %v", err) + } +} + +func TestWaitForSocket_TimesOut(t *testing.T) { + dir := t.TempDir() + sock := filepath.Join(dir, "nope.sock") + start := time.Now() + err := waitForSocket(sock, 200*time.Millisecond) + if err == nil { + t.Fatal("expected timeout error") + } + if elapsed := time.Since(start); elapsed > 1*time.Second { + t.Errorf("waitForSocket overshot deadline: %v", elapsed) + } +} + +func TestWaitForSocket_NonExistError(t *testing.T) { + // A path under a path component that is a regular file (not a directory) + // makes os.Stat return ENOTDIR, not ENOENT — exercises the new + // non-ErrNotExist branch. + dir := t.TempDir() + notADir := filepath.Join(dir, "file") + if err := os.WriteFile(notADir, []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + sock := filepath.Join(notADir, "agentsh.sock") // /tmp/.../file/agentsh.sock — ENOTDIR + err := waitForSocket(sock, 200*time.Millisecond) + if err == nil { + t.Fatal("expected non-nil error for non-existent parent") + } + if !strings.Contains(err.Error(), "stat socket") { + t.Errorf("expected wrapped stat error, got: %v", err) + } +} diff --git a/cmd/agentsh-sbx-bootstrap/main.go b/cmd/agentsh-sbx-bootstrap/main.go new file mode 100644 index 000000000..c406426dc --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/main.go @@ -0,0 +1,78 @@ +// agentsh-sbx-bootstrap is the startup entrypoint installed into Docker +// Sandboxes by the AgentSH mixin kit. It merges the baked coding-agent +// policy with any user override, spawns the agentsh server, then probes +// the active enforcement tier and writes /run/agentsh/tier so the agent's +// SKILL.md can read it. +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +const ( + defaultTemplatePath = "/usr/share/agentsh/coding-agent.template.yaml" + defaultOverlayPath = "/home/agent/.agentsh/policy.yaml" + defaultPolicyPath = "/etc/agentsh/policies/default.yaml" + defaultTierPath = "/run/agentsh/tier" + // defaultBootstrapLog: target for the future bootstrap banner / tier probe + // log. v1 writes those to stderr; the constant reserves the path so + // installers, doc tooling, and Task 5 can reference a single source of truth. + defaultBootstrapLog = "/var/log/agentsh/bootstrap.log" + defaultDaemonLog = "/var/log/agentsh/daemon.log" + defaultAgentshBin = "/usr/bin/agentsh" + defaultServerConfig = "/etc/agentsh/config.yaml" + defaultDaemonSocket = "/run/agentsh/agentsh.sock" + defaultSocketTimeout = 2 * time.Second + defaultShimDir = "/usr/lib/agentsh/shims" +) + +func main() { + var ( + tmpl = flag.String("template", defaultTemplatePath, "Baked policy template path") + overlay = flag.String("overlay", defaultOverlayPath, "User override fragment path") + policy = flag.String("policy", defaultPolicyPath, "Output merged policy path") + agentshBin = flag.String("agentsh", defaultAgentshBin, "Path to the agentsh binary") + srvConfig = flag.String("server-config", defaultServerConfig, "Path to the agentsh server config") + sock = flag.String("socket", defaultDaemonSocket, "Daemon socket path to poll for readiness") + daemonLog = flag.String("daemon-log", defaultDaemonLog, "Path to daemon log file") + ) + flag.Parse() + + if err := mergeAndWritePolicy(*tmpl, *overlay, *policy); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: policy merge failed: %v\n", err) + os.Exit(1) + } + + if _, err := spawnDaemon(*agentshBin, []string{"server", "--config", *srvConfig}, *daemonLog); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: spawn daemon: %v\n", err) + os.Exit(1) + } + + if err := waitForSocket(*sock, defaultSocketTimeout); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: %v (continuing with degraded tier)\n", err) + // Don't exit — tier probe will record tier=none. + } + + shimDir := defaultShimDir + if env := os.Getenv("AGENTSH_SHIM_DIR"); env != "" { + shimDir = env + } + + tier := "none" + if ok, resolved, probeErr := probeShimTier(shimDir); probeErr != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: shim probe failed: %v\n", probeErr) + } else if ok { + tier = "shim" + fmt.Fprintf(os.Stdout, "agentsh-sbx-bootstrap: shim tier active (curl -> %s)\n", resolved) + } else { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: shim tier NOT active (PATH did not yield %s)\n", shimDir) + } + + if err := writeTierFile(defaultTierPath, tier); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: write tier file: %v\n", err) + os.Exit(1) + } +} diff --git a/cmd/agentsh-sbx-bootstrap/policy.go b/cmd/agentsh-sbx-bootstrap/policy.go new file mode 100644 index 000000000..542e2a112 --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/policy.go @@ -0,0 +1,68 @@ +package main + +import ( + "errors" + "fmt" + "os" + "path/filepath" + + "github.com/agentsh/agentsh/internal/policy" + "gopkg.in/yaml.v3" +) + +// mergeAndWritePolicy reads the baked template at `tmpl`, reads the optional +// user override at `overlay` (any read or parse failure is logged to stderr +// and treated as "no overlay"), merges them via policy.MergeOverlay, and +// writes the result atomically to `out` via a temp file + rename. +// +// Returns an error only when the template itself cannot be read or parsed. +// A missing/broken overlay is intentionally non-fatal: the template alone is +// always a safe fallback and the bootstrap is required to fail-open. +func mergeAndWritePolicy(tmpl, overlay, out string) error { + tmplBytes, err := os.ReadFile(tmpl) + if err != nil { + return fmt.Errorf("read template: %w", err) + } + base, err := policy.LoadFromBytes(tmplBytes) + if err != nil { + return fmt.Errorf("parse template: %w", err) + } + + var ov *policy.Policy + if overlay != "" { + ovBytes, ovErr := os.ReadFile(overlay) + switch { + case errors.Is(ovErr, os.ErrNotExist): + // No override file: fine. Bare template wins. + case ovErr != nil: + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: read overlay %q: %v (falling back to template only)\n", overlay, ovErr) + default: + parsed, pErr := policy.LoadFromBytes(ovBytes) + if pErr != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: parse overlay %q: %v (falling back to template only)\n", overlay, pErr) + } else { + ov = parsed + } + } + } + + merged := policy.MergeOverlay(base, ov) + + mergedYAML, err := yaml.Marshal(merged) + if err != nil { + return fmt.Errorf("marshal merged policy: %w", err) + } + + if err := os.MkdirAll(filepath.Dir(out), 0o755); err != nil { + return fmt.Errorf("mkdir output dir: %w", err) + } + tmp := out + ".tmp" + if err := os.WriteFile(tmp, mergedYAML, 0o644); err != nil { + return fmt.Errorf("write tmp: %w", err) + } + if err := os.Rename(tmp, out); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("rename: %w", err) + } + return nil +} diff --git a/cmd/agentsh-sbx-bootstrap/policy_test.go b/cmd/agentsh-sbx-bootstrap/policy_test.go new file mode 100644 index 000000000..366d786b9 --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/policy_test.go @@ -0,0 +1,137 @@ +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +const baseTemplate = ` +version: 1 +name: coding-agent +file_rules: + - name: allow-tmp + paths: ["/tmp/**"] + operations: ["*"] + decision: allow +` + +func TestMergeAndWritePolicy_NoOverlay(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, overlay, out); err != nil { + t.Fatalf("mergeAndWritePolicy: %v", err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(got), "allow-tmp") { + t.Errorf("expected output to contain base rules; got: %s", got) + } +} + +func TestMergeAndWritePolicy_WithOverlay(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + overlayBody := ` +version: 1 +name: user-overlay +file_rules: + - name: allow-extra + paths: ["/data/**"] + operations: ["*"] + decision: allow +` + if err := os.WriteFile(overlay, []byte(overlayBody), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, overlay, out); err != nil { + t.Fatalf("mergeAndWritePolicy: %v", err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + body := string(got) + if !strings.Contains(body, "allow-tmp") { + t.Error("expected base rule allow-tmp in merged output") + } + if !strings.Contains(body, "allow-extra") { + t.Error("expected overlay rule allow-extra in merged output") + } +} + +func TestMergeAndWritePolicy_BadOverlayFallsBackToTemplate(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(overlay, []byte("not: [valid: yaml"), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, overlay, out); err != nil { + t.Fatalf("mergeAndWritePolicy should not error on bad overlay: %v", err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(got), "allow-tmp") { + t.Error("expected fallback to template-only on bad overlay") + } +} + +func TestMergeAndWritePolicy_MissingTemplateErrors(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "nonexistent.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + err := mergeAndWritePolicy(tmpl, overlay, out) + if err == nil { + t.Fatal("expected error when template is missing") + } +} + +func TestMergeAndWritePolicy_AtomicWrite(t *testing.T) { + // If the destination already exists with content X, and the merge succeeds, + // the file should contain the new content (i.e. rename, not append). + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(out, []byte("stale: content\n"), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, "", out); err != nil { + t.Fatal(err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(got), "stale") { + t.Error("expected stale content to be replaced") + } +} diff --git a/cmd/agentsh-sbx-bootstrap/tier.go b/cmd/agentsh-sbx-bootstrap/tier.go new file mode 100644 index 000000000..5cbe70a6d --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/tier.go @@ -0,0 +1,67 @@ +package main + +import ( + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// probeShimTier runs a tiny shell that first sources the kit's PATH-injection +// hook at /etc/profile.d/agentsh.sh (if present) and then resolves curl. +// Returns (ok, resolvedPath, err). `ok=true` when the resolved curl lives +// under shimDir; `ok=false` means either curl is absent or the system curl +// is winning over the shim (i.e. the kit's PATH wiring isn't effective). +// A non-nil error means the probe couldn't be run at all (e.g. /bin/sh +// missing). +// +// Sourcing profile.d explicitly matters because the bootstrap typically runs +// in a non-login shell (the Docker Sandboxes `startup` phase), so PATH +// modifications written to /etc/profile.d/ are not picked up by `/bin/sh` +// out of the box. The probe must verify the agent's eventual PATH, not the +// bootstrap's own PATH at invocation time. +func probeShimTier(shimDir string) (bool, string, error) { + // Guard the `dot` source with `[ -r ... ]` rather than `2>/dev/null || true`: + // in POSIX mode (which bash-as-/bin/sh enters), a missing-file failure from + // the special builtin `.` aborts the shell before `|| true` can rescue it, + // turning a no-op into a probe failure. The bracket test sidesteps that. + const script = "[ -r /etc/profile.d/agentsh.sh ] && . /etc/profile.d/agentsh.sh; command -v curl" + cmd := exec.Command("/bin/sh", "-c", script) + out, err := cmd.Output() + if err != nil { + // `command -v curl` exits 1 when curl isn't found; that's not an error + // for our purposes — it just means the shim tier didn't apply. + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + return false, "", nil + } + return false, "", fmt.Errorf("probe: %w", err) + } + resolved := strings.TrimSpace(string(out)) + if resolved == "" { + return false, "", nil + } + clean := filepath.Clean(shimDir) + return strings.HasPrefix(resolved, clean+string(filepath.Separator)) || resolved == clean, resolved, nil +} + +// writeTierFile writes the active tier name (e.g. "shim" or "none") followed +// by a trailing newline to path. Atomic via tmp+rename so concurrent readers +// (the SKILL.md tells the agent to `cat` this file) never see a half-written +// value. Creates parent dirs with mode 0755. +func writeTierFile(path, tier string) error { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return fmt.Errorf("mkdir tier dir: %w", err) + } + tmp := path + ".tmp" + if err := os.WriteFile(tmp, []byte(tier+"\n"), 0o644); err != nil { + return fmt.Errorf("write tmp: %w", err) + } + if err := os.Rename(tmp, path); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("rename: %w", err) + } + return nil +} diff --git a/cmd/agentsh-sbx-bootstrap/tier_test.go b/cmd/agentsh-sbx-bootstrap/tier_test.go new file mode 100644 index 000000000..a81e801e7 --- /dev/null +++ b/cmd/agentsh-sbx-bootstrap/tier_test.go @@ -0,0 +1,76 @@ +package main + +import ( + "errors" + "os" + "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestProbeShimTier_DetectsShimOnPath(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("shell-based probe is POSIX only") + } + dir := t.TempDir() + shimDir := filepath.Join(dir, "shims") + if err := os.Mkdir(shimDir, 0o755); err != nil { + t.Fatal(err) + } + // Place a fake `curl` executable in the shim dir. + fakeCurl := filepath.Join(shimDir, "curl") + if err := os.WriteFile(fakeCurl, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatal(err) + } + + // Inject the shim dir at the front of PATH for the probe. + t.Setenv("PATH", shimDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + ok, resolved, err := probeShimTier(shimDir) + if err != nil { + t.Fatalf("probeShimTier: %v", err) + } + if !ok { + t.Errorf("expected probe to detect shim; resolved=%q", resolved) + } + if !strings.HasPrefix(resolved, shimDir) { + t.Errorf("resolved %q should be under shim dir %q", resolved, shimDir) + } +} + +func TestProbeShimTier_RejectsRealCurl(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("shell-based probe is POSIX only") + } + // Don't put any shim on PATH. The system curl (if present) should NOT + // match the shim dir, so the probe returns false. + t.Setenv("PATH", "/usr/bin:/bin") + ok, _, err := probeShimTier("/nonexistent/shims") + if err != nil { + var ee *exec.ExitError + if !errors.As(err, &ee) || ee.ExitCode() != 1 { + t.Fatalf("unexpected probe error: %v", err) + } + // exit 1 = "curl not found"; acceptable on hosts without curl + } + if ok { + t.Errorf("expected probe to NOT detect shim when only /usr/bin/curl is reachable") + } +} + +func TestWriteTierFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tier") + if err := writeTierFile(path, "shim"); err != nil { + t.Fatalf("writeTierFile: %v", err) + } + got, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if string(got) != "shim\n" { + t.Errorf("tier file = %q, want %q", got, "shim\n") + } +} diff --git a/configs/policies/coding-agent.yaml b/configs/policies/coding-agent.yaml new file mode 100644 index 000000000..48a7847ae --- /dev/null +++ b/configs/policies/coding-agent.yaml @@ -0,0 +1,208 @@ +# Coding-agent policy for AgentSH inside Docker Sandboxes. +# This is the baked-in template the agentsh-sbx-bootstrap binary merges with +# any user override at /home/agent/.agentsh/policy.yaml on every sandbox start. +# +# Reference: /usr/share/doc/agentsh/policy-reference.md +# To extend: write rules to /home/agent/.agentsh/policy.yaml; the bootstrap +# merges them on top of this file (user wins on name collision; otherwise +# rules concatenate in declared order). + +version: 1 +name: coding-agent +description: | + Default policy for AI coding agents (Claude Code, OpenCode, Gemini CLI) + running inside Docker Sandboxes. Tuned for path/command granularity inside + the sandbox; outbound network controls are handled by the Docker Sandbox + proxy and intentionally not duplicated here. + +# ============================================================================= +# FILE RULES — evaluated in order, first match wins. +# ============================================================================= +file_rules: + + # ---- Sensitive credential paths: deny before any allow-home rule matches. + - name: deny-credential-paths + description: Block reads/writes of host credentials that may have leaked into the sandbox. + paths: + - "/home/**/.ssh/**" + - "/home/**/.aws/**" + - "/home/**/.gnupg/**" + - "/home/**/.kube/**" + - "/home/**/.docker/config.json" + - "/home/**/.netrc" + - "/home/**/.config/gcloud/**" + - "/home/**/.config/gh/**" + - "/home/**/.config/git-credentials" + - "/root/.ssh/**" + - "/root/.aws/**" + - "/root/.gnupg/**" + - "/root/.kube/**" + - "/root/.netrc" + - "/root/.docker/config.json" + - "/root/.config/gcloud/**" + - "/root/.config/gh/**" + - "/root/.config/git-credentials" + operations: ["*"] + decision: deny + message: "Access to credential path {{.Path}} is denied by the coding-agent policy." + + # ---- AgentSH self-protection: agent cannot edit its own policy/logs/binaries. + - name: deny-self-write + description: Prevent the agent from tampering with AgentSH state. + paths: + - "/etc/agentsh/**" + - "/usr/lib/agentsh/**" + - "/usr/share/agentsh/**" + - "/run/agentsh/**" + - "/var/lib/agentsh/**" + - "/var/log/agentsh/**" + operations: [write, create, mkdir, chmod, rename, delete, rmdir] + decision: deny + message: "Write to AgentSH-controlled path {{.Path}} is denied." + + # ---- Workspace: full read/write; deletes are soft so rm -rf is recoverable. + - name: allow-workspace-read + paths: ["/workspace", "/workspace/**"] + operations: [read, open, stat, list, readlink] + decision: allow + + - name: allow-workspace-write + paths: ["/workspace", "/workspace/**"] + operations: [write, create, mkdir, chmod, rename] + decision: allow + + - name: soft-delete-workspace + description: Soft-delete workspace files (recoverable via /var/lib/agentsh/trash). + paths: ["/workspace", "/workspace/**"] + operations: [delete, rmdir] + decision: soft_delete + message: "File quarantined (recoverable): {{.Path}}" + + # ---- Package manager caches: full allow (routine for coding work). + - name: allow-package-caches + paths: + - "/home/**/.npm/**" + - "/home/**/.cache/pip/**" + - "/home/**/.cargo/**" + - "/home/**/.cache/go-build/**" + - "/home/**/.rustup/**" + - "/home/**/.gradle/caches/**" + - "/home/**/.m2/**" + - "/root/.npm/**" + - "/root/.cache/pip/**" + - "/root/.cargo/**" + - "/root/.cache/go-build/**" + - "/root/.rustup/**" + - "/root/.gradle/caches/**" + - "/root/.m2/**" + operations: ["*"] + decision: allow + + # ---- Home: read/write everywhere except the credential paths denied above. + - name: allow-home + paths: ["/home/**", "/root/**"] + operations: ["*"] + decision: allow + + # ---- Tmp: full access. + - name: allow-tmp + paths: ["/tmp/**", "/var/tmp/**"] + operations: ["*"] + decision: allow + + # ---- System paths: read-only allow. + - name: allow-system-read + paths: + - "/usr/**" + - "/lib/**" + - "/lib64/**" + - "/bin/**" + - "/sbin/**" + - "/opt/**" + operations: [read, open, stat, list, readlink] + decision: allow + + - name: allow-etc-read-safe + paths: + - "/etc/hosts" + - "/etc/resolv.conf" + - "/etc/ssl/**" + - "/etc/ca-certificates/**" + - "/etc/localtime" + - "/etc/timezone" + - "/etc/mime.types" + - "/etc/protocols" + - "/etc/services" + - "/etc/environment" + - "/etc/environment.d/**" + - "/etc/profile.d/**" + operations: [read, open, stat] + decision: allow + +# ============================================================================= +# COMMAND RULES +# ============================================================================= +# curl|sh patterns are caught by the shellc-opaque-script layer; a future +# audit/redirect rule (to agentsh-fetch) will be added in v1.1. +command_rules: + + - name: deny-privilege-escalation + description: The Docker Sandbox already pins the agent to a fixed user; escalation is suspicious. + commands: [sudo, su, doas] + decision: deny + message: "Privilege escalation via {{.Command}} is denied inside a Docker Sandbox." + + - name: approve-recursive-chmod + description: Require approval for chmod -R on / or /home, or chmod 777. + commands: [chmod] + args_patterns: + - "^-R\\s+/$" + - "^-R\\s+/home.*" + - ".*777.*" + decision: approve + message: "Recursive or world-writable chmod requested: chmod {{.Args}}" + + - name: audit-package-installers + description: "Routine for coding agents; audit-log all package manager invocations." + commands: [pip, pip3, npm, yarn, pnpm, cargo, apt, apt-get, gem, bundle] + decision: audit + +# ============================================================================= +# SIGNAL RULES +# ============================================================================= +signal_rules: + + - name: deny-signal-pid1 + description: The agent must not signal PID 1. + signals: ["@fatal", "@job"] + target: + type: pid_range + min: 1 + max: 1 + decision: deny + message: "Signaling PID 1 is denied." + + - name: deny-signal-agentsh + description: The agent must not signal AgentSH processes. + signals: ["@fatal", "@job"] + target: + type: external + pattern: "agentsh*" + decision: deny + message: "Signaling AgentSH is denied." + + - name: allow-signal-own-tree + description: Allow signals within the agent's own subprocess tree. + signals: ["@fatal", "@job"] + target: + type: children + decision: allow + +# ============================================================================= +# AUDIT +# ============================================================================= +audit: + log_allowed: false + log_denied: true + log_approved: true + retention_days: 7 diff --git a/docker/sbx-kit/README.md b/docker/sbx-kit/README.md new file mode 100644 index 000000000..286224788 --- /dev/null +++ b/docker/sbx-kit/README.md @@ -0,0 +1,136 @@ +# AgentSH mixin kit for Docker Sandboxes + +This is a [Docker Sandboxes mixin kit](https://docs.docker.com/ai/sandboxes/customize/kits/) +that installs [AgentSH](https://github.com/erans/agentsh) into any sandbox at +creation and routes the agent's command-level activity through a +coding-agent-tuned policy. + +## Use + +``` +sbx run --kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit +``` + +Works with `claude`, `opencode`, `gemini`, and any agent kit derived from +`docker/sandbox-templates:shell-docker`. + +## Verify + +``` +sbx exec cat /run/agentsh/tier # expect: shim +sbx exec cat /etc/agentsh/policies/default.yaml +sbx exec pgrep -af 'agentsh server' +``` + +For a deeper smoke test, run `tests/coding-agent-smoke.sh` inside the +sandbox. + +## OpenCode / Gemini setup + +Claude Code auto-discovers `.claude/skills/agentsh/SKILL.md`. For other +agents, copy the SKILL into your agent's discovery path: + +``` +sbx exec cp /workspace/.claude/skills/agentsh/SKILL.md /workspace/AGENTS.md +``` + +(Or symlink, or merge with your own `AGENTS.md` — whatever fits your flow.) + +## Logs + +| File | Purpose | +|---|---| +| `/var/log/agentsh/bootstrap.log` | Startup banner, policy-merge result, tier-probe result | +| `/var/log/agentsh/daemon.log` | Daemon stdout+stderr | + +## v1 enforcement tier + +v1 ships shim-tier interception only: subprocess execs of common commands +are routed through AgentSH's shim binary. LD_PRELOAD and ptrace tiers are +planned (see the spec under +`docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md`). + +## Behavior: agent harness runs under `agentsh wrap` + +This kit runs the agent harness under `agentsh wrap` whenever it can. After +install, the kit discovers each known agent binary on PATH (the same way the +agent kit's entrypoint resolves it), renames it to `.real`, and drops a +symlink to `/usr/lib/agentsh/agent-wrap` at the original location. The agent +kit's entrypoint then resolves to our wrapper, which engages `agentsh wrap` +before exec'ing the moved-aside real binary. + +This gives you full exec-pipeline interception of every subprocess the agent +spawns, a coherent session, and a session report on exit. + +Wrapped agents (v1): `claude`, `opencode`, `gemini`, `codex`, `cursor`. The +installer skips agents whose binary is not on PATH and silent-skips agents +that are already correctly wrapped (idempotent re-run). If a foreign +`.real` already exists but `` is not our symlink, the installer +refuses to overwrite and emits a stderr warning. + +### Fail-CLOSED deviation from the parent spec + +When the wrapper at `` runs, it exits non-zero and +refuses to launch the agent if AgentSH cannot engage cleanly: the `agentsh` +binary is missing, `/run/agentsh/tier` does not read `shim`, or the tier +file is missing. Choosing this kit means choosing enforcement-mandatory +semantics; running unenforced is not a supported state. + +This deviates from the parent spec's §7 "never bricks the sandbox" stance. +The parent spec governs the kit's *bootstrap*; this section governs the +wrapper's behavior at *agent launch time*. + +### Known limitations + +- **Uninstall is non-trivial.** Removing the kit no longer just removes a + symlink — the agent binary's original location is now occupied by a + symlink, and the real binary lives at `.real`. Clean recovery + requires removing the symlink and renaming `.real` back to + `` for each wrapped agent. There is no automated uninstall yet. +- **Install-time failures pass through.** If the kit's `install` command + itself fails (curl 404, package install error), the wrappers are never + created and the agent runs unwrapped. sbx run should report this + failure visibly. + +## E2E test (no `sbx` required) + +`tests/run-e2e.sh` (or `make sbx-e2e` from the repo root) exercises the kit's +mechanics against the public `docker/sandbox-templates:shell-docker` image — +no Docker Sandboxes install needed. It: + +1. Builds `agentsh-shell-shim` and `agentsh-sbx-bootstrap` on the host (CGO off). +2. Starts a container from the sandbox template. +3. Lays down the same payload `sbx run --kit` would (binaries, policy template, + `/usr/lib/agentsh/shims/*` symlinks, `/etc/profile.d/agentsh.sh`, + `/etc/environment.d/10-agentsh.conf`, the kit's `files/` tree, plus a user + override fragment crafted to exercise both replace-by-name and append). +4. Runs `/usr/bin/agentsh-sbx-bootstrap`. +5. Verifies: `/run/agentsh/tier == shim`; `command -v curl` resolves under + `/usr/lib/agentsh/shims/`; `/etc/agentsh/policies/default.yaml` is the + merged policy (baked rule present; appended override rule present; + replace-by-name overlay paths win); SKILL.md and override stub landed. + +What it does **not** verify (still gated on a real `sbx run` against a +tagged release): + +- The `install` step actually downloading `install.sh` and the matching + `.deb`/`.rpm`/`.apk` from the GitHub release. +- In-sandbox enforcement (deny / audit / soft_delete) — that needs the + `agentsh server` running, which depends on libseccomp; out of scope for + v1 E2E. Run `tests/coding-agent-smoke.sh` inside a real sandbox for that. +- That the agent kit's actual entrypoint inherits the shim PATH. + +## Override the policy + +Write a partial YAML policy to `/home/agent/.agentsh/policy.yaml` inside the +sandbox. See `/usr/share/doc/agentsh/policy-reference.md` for the grammar. +Restart the sandbox to apply. + +## Note on smoke test permissions + +If `tests/coding-agent-smoke.sh` loses its executable bit post-checkout (git +may not preserve file modes on some platforms), restore it with: + +``` +chmod +x docker/sbx-kit/tests/coding-agent-smoke.sh +``` diff --git a/docker/sbx-kit/files/home/agent/.agentsh/policy.yaml b/docker/sbx-kit/files/home/agent/.agentsh/policy.yaml new file mode 100644 index 000000000..9a2e3e132 --- /dev/null +++ b/docker/sbx-kit/files/home/agent/.agentsh/policy.yaml @@ -0,0 +1,18 @@ +# AgentSH user-override fragment. +# +# Anything you write here merges on top of the baked coding-agent policy at +# /usr/share/agentsh/coding-agent.template.yaml on the next sandbox start. +# Rules that share a `name` with a baked rule replace it; rules with new +# names append after the baked set. +# +# Reference: /usr/share/doc/agentsh/policy-reference.md +# +# Example (uncomment to use): +# +# version: 1 +# name: my-overrides +# file_rules: +# - name: allow-extra-write-area +# paths: ["/data/**"] +# operations: [write, create, mkdir, rename] +# decision: allow diff --git a/docker/sbx-kit/files/workspace/.claude/skills/agentsh/SKILL.md b/docker/sbx-kit/files/workspace/.claude/skills/agentsh/SKILL.md new file mode 100644 index 000000000..13d876efb --- /dev/null +++ b/docker/sbx-kit/files/workspace/.claude/skills/agentsh/SKILL.md @@ -0,0 +1,60 @@ +--- +name: agentsh +description: Use when the user asks about AgentSH policy, sandbox enforcement, audit events, or what file/network/command operations are allowed inside this Docker Sandbox. Read /run/agentsh/tier for the active enforcement mode, /etc/agentsh/policies/default.yaml for the merged active policy, and /home/agent/.agentsh/policy.yaml for the user-overlay fragment. +--- + +# AgentSH in this sandbox + +This sandbox has AgentSH installed via the Docker Sandboxes mixin kit. It +enforces a policy on file, network, command, and signal operations performed +by you and your subprocesses. + +## Inspect the live state + +| Question | Run | +|---|---| +| What enforcement tier is active? | `cat /run/agentsh/tier` (one of `shim`, `none`) | +| What policy is being enforced right now? | `cat /etc/agentsh/policies/default.yaml` | +| What are my overrides on top of the baked policy? | `cat /home/agent/.agentsh/policy.yaml` | +| Is the daemon running? | `pgrep -af 'agentsh server'` | +| Full grammar reference | `cat /usr/share/doc/agentsh/policy-reference.md` | + +## Extend the policy + +Write a partial YAML policy to `/home/agent/.agentsh/policy.yaml`. The +bootstrap merges it on top of the baked `coding-agent` template on the next +sandbox start. Rules that share a `name` with a baked rule replace it; +rules with new names append. + +Minimal example: + +```yaml +version: 1 +name: my-overrides +file_rules: + - name: allow-data-area + paths: ["/data/**"] + operations: [write, create, mkdir, rename] + decision: allow +``` + +Restart the sandbox via Docker Sandboxes to pick up the change. In-place +reload is not supported in v1. + +## Common patterns + +- Let the agent write outside `/workspace`: add a `file_rules` entry with `decision: allow` for the new paths. +- Block a command unconditionally: add a `command_rules` entry with `decision: deny`. +- Soft-delete instead of hard-delete on a path: `decision: soft_delete` in a `file_rules` entry for `delete`/`rmdir` operations. +- Audit (don't block) a pattern: `decision: audit`. + +For the full grammar — every field, every decision value, available +templating variables — read `/usr/share/doc/agentsh/policy-reference.md`. + +## When the tier is `none` + +That means the bootstrap couldn't confirm the shim PATH made it past the +agent's entrypoint, OR the daemon failed to start. Check +`/var/log/agentsh/bootstrap.log` and `/var/log/agentsh/daemon.log` for the +reason. The agent will continue to run — AgentSH never blocks the agent's +startup — but enforcement is degraded to advisory. diff --git a/docker/sbx-kit/spec.yaml b/docker/sbx-kit/spec.yaml new file mode 100644 index 000000000..766ec82d3 --- /dev/null +++ b/docker/sbx-kit/spec.yaml @@ -0,0 +1,33 @@ +# AgentSH mixin kit for Docker Sandboxes. +# See docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md +# Invoke: sbx run --kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit + +schemaVersion: "1" +kind: mixin +name: agentsh +displayName: AgentSH +description: Policy-enforced execution gateway for AI coding agents + +commands: + install: + - command: "/bin/sh -c 'curl -fsSL https://github.com/erans/agentsh/releases/latest/download/install.sh | sh'" + user: "0" + description: Install agentsh from the latest GitHub release + - command: "/usr/lib/agentsh/install-agent-wrappers.sh" + user: "0" + description: Wrap detected agent binaries via /usr/local/bin/ symlinks + + initFiles: + - path: /etc/profile.d/agentsh.sh + content: 'export PATH=/usr/lib/agentsh/shims:$PATH' + mode: "0644" + + - path: /etc/environment.d/10-agentsh.conf + content: 'PATH=/usr/lib/agentsh/shims:/usr/local/bin:/usr/bin:/bin' + mode: "0644" + + startup: + - command: ["/usr/bin/agentsh-sbx-bootstrap"] + user: "0" + background: true + description: Merge policy, start agentsh server, probe enforcement tier diff --git a/docker/sbx-kit/spec_test.go b/docker/sbx-kit/spec_test.go new file mode 100644 index 000000000..f795dab63 --- /dev/null +++ b/docker/sbx-kit/spec_test.go @@ -0,0 +1,149 @@ +// Package sbxkit hosts a structural test for spec.yaml so a fresh engineer +// can't break the manifest format without CI catching it. +package sbxkit + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +type kitSpec struct { + SchemaVersion string `yaml:"schemaVersion"` + Kind string `yaml:"kind"` + Name string `yaml:"name"` + DisplayName string `yaml:"displayName"` + Description string `yaml:"description"` + Commands kitCmds `yaml:"commands"` +} + +type kitCmds struct { + Install []kitInstall `yaml:"install"` + InitFiles []kitInitFile `yaml:"initFiles"` + Startup []kitStartup `yaml:"startup"` +} + +type kitInstall struct { + Command string `yaml:"command"` + User string `yaml:"user"` + Description string `yaml:"description"` +} + +type kitInitFile struct { + Path string `yaml:"path"` + Content string `yaml:"content"` + Mode string `yaml:"mode"` +} + +type kitStartup struct { + Command []string `yaml:"command"` + User string `yaml:"user"` + Background bool `yaml:"background"` + Description string `yaml:"description"` +} + +func loadSpec(t *testing.T) *kitSpec { + t.Helper() + path := filepath.Join("spec.yaml") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read spec.yaml: %v", err) + } + var s kitSpec + if err := yaml.Unmarshal(data, &s); err != nil { + t.Fatalf("parse spec.yaml: %v", err) + } + return &s +} + +func TestSpecYAML_TopLevel(t *testing.T) { + s := loadSpec(t) + if s.SchemaVersion != "1" { + t.Errorf("schemaVersion = %q, want %q", s.SchemaVersion, "1") + } + if s.Kind != "mixin" { + t.Errorf("kind = %q, want %q", s.Kind, "mixin") + } + if s.Name != "agentsh" { + t.Errorf("name = %q, want %q", s.Name, "agentsh") + } +} + +func TestSpecYAML_InstallReferencesInstallScript(t *testing.T) { + s := loadSpec(t) + if len(s.Commands.Install) != 2 { + t.Fatalf("expected exactly two install commands, got %d", len(s.Commands.Install)) + } + + // First entry: curl install.sh | sh + first := s.Commands.Install[0].Command + if !strings.Contains(first, "install.sh") { + t.Errorf("first install command does not curl install.sh: %q", first) + } + if s.Commands.Install[0].User != "0" { + t.Errorf("first install user = %q, want %q (root)", s.Commands.Install[0].User, "0") + } + + // Second entry: install-agent-wrappers.sh + second := s.Commands.Install[1].Command + if !strings.Contains(second, "install-agent-wrappers.sh") { + t.Errorf("second install command does not invoke install-agent-wrappers.sh: %q", second) + } + if s.Commands.Install[1].User != "0" { + t.Errorf("second install user = %q, want %q (root)", s.Commands.Install[1].User, "0") + } +} + +func TestSpecYAML_InitFilesSetShimPath(t *testing.T) { + s := loadSpec(t) + var foundProfile, foundEnv bool + for _, f := range s.Commands.InitFiles { + if f.Path == "/etc/profile.d/agentsh.sh" { + foundProfile = true + if !strings.Contains(f.Content, "/usr/lib/agentsh/shims") { + t.Errorf("profile.d entry does not export shim PATH: %q", f.Content) + } + } + if f.Path == "/etc/environment.d/10-agentsh.conf" { + foundEnv = true + if !strings.Contains(f.Content, "/usr/lib/agentsh/shims") { + t.Errorf("environment.d entry does not include shim PATH: %q", f.Content) + } + } + } + if !foundProfile { + t.Error("initFiles missing /etc/profile.d/agentsh.sh entry") + } + if !foundEnv { + t.Error("initFiles missing /etc/environment.d/10-agentsh.conf entry") + } +} + +func TestSpecYAML_StartupInvokesBootstrap(t *testing.T) { + s := loadSpec(t) + if len(s.Commands.Startup) != 1 { + t.Fatalf("expected exactly one startup command, got %d", len(s.Commands.Startup)) + } + cmd := s.Commands.Startup[0] + if len(cmd.Command) == 0 || cmd.Command[0] != "/usr/bin/agentsh-sbx-bootstrap" { + t.Errorf("startup command = %v, want first element /usr/bin/agentsh-sbx-bootstrap", cmd.Command) + } + if !cmd.Background { + t.Error("startup command must be background:true") + } +} + +func TestKitFiles_SkillExists(t *testing.T) { + if _, err := os.Stat(filepath.Join("files", "workspace", ".claude", "skills", "agentsh", "SKILL.md")); err != nil { + t.Errorf("SKILL.md missing: %v", err) + } +} + +func TestKitFiles_OverrideStubExists(t *testing.T) { + if _, err := os.Stat(filepath.Join("files", "home", "agent", ".agentsh", "policy.yaml")); err != nil { + t.Errorf("override stub missing: %v", err) + } +} diff --git a/docker/sbx-kit/tests/coding-agent-smoke.sh b/docker/sbx-kit/tests/coding-agent-smoke.sh new file mode 100755 index 000000000..c04fd48e9 --- /dev/null +++ b/docker/sbx-kit/tests/coding-agent-smoke.sh @@ -0,0 +1,101 @@ +#!/usr/bin/env bash +# Manual smoke test exercised inside a Docker Sandbox that has the AgentSH +# mixin kit installed. Run via: +# sbx exec bash /workspace/.claude/skills/agentsh/coding-agent-smoke.sh +# +# Or copy this file into the sandbox manually and run it as the agent user. +# +# Each check prints PASS / FAIL. Exits non-zero on any FAIL. + +set -u + +pass=0 +fail=0 + +assert() { + local label="$1" + local got="$2" + local want="$3" + if [ "$got" = "$want" ]; then + echo "PASS: $label" + pass=$((pass+1)) + else + printf 'FAIL: %s (got=%q, want=%q)\n' "$label" "$got" "$want" >&2 + fail=$((fail+1)) + fi +} + +assert_contains() { + local label="$1" + local got="$2" + local want="$3" + if printf '%s' "$got" | grep -q -- "$want"; then + echo "PASS: $label" + pass=$((pass+1)) + else + echo "FAIL: $label (output did not contain $want)" + echo "----- got: -----" + printf '%s\n' "$got" + echo "----------------" + fail=$((fail+1)) + fi +} + +# Check 1: tier file says shim +got=$(cat /run/agentsh/tier 2>/dev/null || echo missing) +assert "tier file = shim" "$got" "shim" + +# Check 2: curl resolves under the shim dir +resolved=$(command -v curl) +assert_contains "curl resolves under shim dir" "$resolved" "/usr/lib/agentsh/shims" + +# Check 3: cat ~/.ssh/id_rsa is denied (no such file is fine; we expect either ENOENT or EACCES via deny) +mkdir -p "$HOME/.ssh" +printf 'fake-key\n' > "$HOME/.ssh/id_rsa.smoke" +out=$(cat "$HOME/.ssh/id_rsa.smoke" 2>&1) && rc=0 || rc=$? +rm -f "$HOME/.ssh/id_rsa.smoke" +if [ "$rc" -ne 0 ]; then + echo "PASS: cat ~/.ssh/id_rsa.smoke denied (rc=$rc)" + pass=$((pass+1)) +else + echo "FAIL: cat ~/.ssh/id_rsa.smoke succeeded — deny rule did not fire" + echo "----- got: -----" + echo "$out" + echo "----------------" + fail=$((fail+1)) +fi + +# Check 4: sudo is denied +out=$(sudo whoami 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 0 ]; then + echo "PASS: sudo denied (rc=$rc)" + pass=$((pass+1)) +else + echo "FAIL: sudo succeeded — deny rule did not fire" + echo "----- got: -----" + echo "$out" + echo "----------------" + fail=$((fail+1)) +fi + +# Check 5: soft-delete on /workspace +mkdir -p /workspace +echo "$$" > /workspace/smoke.tmp +rm /workspace/smoke.tmp 2>/dev/null || true +if [ -f /workspace/smoke.tmp ]; then + echo "FAIL: /workspace/smoke.tmp still present after rm" + fail=$((fail+1)) +else + # Look for it in the trash directory + if find /var/lib/agentsh/trash -name smoke.tmp 2>/dev/null | grep -q smoke.tmp; then + echo "PASS: soft-delete recoverable" + pass=$((pass+1)) + else + echo "FAIL: soft-delete trash entry not found" + fail=$((fail+1)) + fi +fi + +echo +echo "summary: $pass pass, $fail fail" +exit $([ "$fail" -eq 0 ] && echo 0 || echo 1) diff --git a/docker/sbx-kit/tests/run-e2e.sh b/docker/sbx-kit/tests/run-e2e.sh new file mode 100755 index 000000000..c23b877d8 --- /dev/null +++ b/docker/sbx-kit/tests/run-e2e.sh @@ -0,0 +1,434 @@ +#!/usr/bin/env bash +# E2E test for the AgentSH Docker Sandboxes mixin kit. +# +# Verifies kit mechanics against a real Docker Sandboxes agent template +# (docker/sandbox-templates:shell-docker) without requiring `sbx` itself. +# Builds the binaries on the host, mounts them into the agent-template +# container, simulates what `sbx run --kit` would do (install layout +# + initFiles + startup), and runs verification checks. +# +# What this proves works: +# 1. The bootstrap binary merges /usr/share/agentsh/coding-agent.template.yaml +# with /home/agent/.agentsh/policy.yaml into /etc/agentsh/policies/default.yaml. +# 2. The merged file contains both baked rules AND user-override rules +# (replace-by-name + append semantics, exercised end-to-end). +# 3. The PATH wiring from /etc/profile.d/agentsh.sh resolves `command -v curl` +# under /usr/lib/agentsh/shims/. +# 4. /run/agentsh/tier is written with the active tier name. +# +# Out of scope here (still gated on a real `sbx run` + tagged release): +# - In-sandbox enforcement of file/command/signal rules (the agentsh server's +# denial paths). The bootstrap is fail-open on a missing or unstartable +# daemon, so this test side-steps starting the daemon and instead asserts +# the bootstrap's deterministic outputs. +# - The `install` step actually downloading install.sh — that needs a tagged +# release. See docker/sbx-kit/README.md for the manual `sbx run` recipe. +# +# Usage: +# docker/sbx-kit/tests/run-e2e.sh +# make sbx-e2e +# +# Exit codes: +# 0 — all checks passed +# 1 — host prerequisite missing (docker, go) +# 2 — build failed +# 3 — one or more verification checks failed + +set -euo pipefail + +# Resolve repo root regardless of caller's CWD. +HERE="$(cd "$(dirname "$0")" && pwd)" +REPO="$(cd "$HERE/../../.." && pwd)" +cd "$REPO" + +IMAGE="docker/sandbox-templates:shell-docker" +CONTAINER="agentsh-sbx-e2e-$$" +STAGE="$(mktemp -d -t agentsh-sbx-e2e.XXXXXX)" + +PASS=0 +FAIL=0 +SKIP=0 + +log() { printf '%s\n' "$*"; } +pass() { printf 'PASS: %s\n' "$*"; PASS=$((PASS + 1)); } +fail() { printf 'FAIL: %s\n' "$*" >&2; FAIL=$((FAIL + 1)); } +skip() { printf 'SKIP: %s\n' "$*"; SKIP=$((SKIP + 1)); } + +cleanup() { + docker rm -f "$CONTAINER" >/dev/null 2>&1 || true + # Remove any per-agent containers from Section 8 (best-effort). + docker ps -aq --filter "name=agentsh-sbx-e2e-" 2>/dev/null \ + | xargs -r docker rm -f >/dev/null 2>&1 || true + rm -rf "$STAGE" +} +trap cleanup EXIT + +# --------------------------------------------------------------------------- +# 1. Host prerequisites +# --------------------------------------------------------------------------- + +command -v docker >/dev/null 2>&1 || { log "docker not found"; exit 1; } +command -v go >/dev/null 2>&1 || { log "go not found"; exit 1; } + +# --------------------------------------------------------------------------- +# 2. Build the binaries we'll mount into the container +# --------------------------------------------------------------------------- +# Build with GOOS=linux GOARCH=amd64 so the binary runs in the linux/amd64 +# sandbox image regardless of what we're building on. CGO_ENABLED=0 keeps +# the binary statically linked — we don't need libseccomp because this test +# does not start the agentsh server. + +log "Building binaries for linux/amd64 (CGO_ENABLED=0)..." +mkdir -p "$STAGE/bin" +GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o "$STAGE/bin/agentsh-shell-shim" ./cmd/agentsh-shell-shim +GOOS=linux GOARCH=amd64 CGO_ENABLED=0 go build -o "$STAGE/bin/agentsh-sbx-bootstrap" ./cmd/agentsh-sbx-bootstrap + +# Empty stub for /usr/bin/agentsh so the bootstrap's spawnDaemon doesn't +# exec a missing binary. The bootstrap is fail-open on socket-wait, so a +# no-op server is acceptable for the mechanics test. +cat >"$STAGE/bin/agentsh" <<'STUB' +#!/bin/sh +# E2E test stub: the agentsh server is not exercised in this test. +exec sleep 600 +STUB +chmod +x "$STAGE/bin/agentsh" + +# --------------------------------------------------------------------------- +# 3. Stage the user override that we'll bind-mount into the container. +# Choose rules that exercise BOTH replace-by-name AND append semantics +# so the merge step is genuinely tested, not just executed. +# --------------------------------------------------------------------------- + +mkdir -p "$STAGE/home-overrides" +cat >"$STAGE/home-overrides/policy.yaml" <<'YAML' +version: 1 +name: e2e-overrides +file_rules: + # New rule, must APPEND after the baked set. + - name: e2e-allow-data + paths: ["/data/**"] + operations: ["*"] + decision: allow + + # Same name as a baked rule — must REPLACE the baked allow-tmp in place. + - name: allow-tmp + paths: ["/tmp/**", "/var/tmp/**", "/srv/scratch/**"] + operations: ["*"] + decision: allow +YAML + +# --------------------------------------------------------------------------- +# 4. Start the container with the kit's pieces bind-mounted in. +# --user 0 is required because we touch /usr, /etc, /run, /var/log. +# --------------------------------------------------------------------------- + +log "Starting container ($IMAGE)..." +docker run -d --rm --name "$CONTAINER" --user 0 \ + -v "$STAGE/bin:/sbx-e2e/bin:ro" \ + -v "$REPO/configs/policies/coding-agent.yaml:/sbx-e2e/coding-agent.yaml:ro" \ + -v "$REPO/packaging/config.yaml:/sbx-e2e/server-config.yaml:ro" \ + -v "$REPO/docker/sbx-kit/files:/sbx-e2e/kit-files:ro" \ + -v "$STAGE/home-overrides/policy.yaml:/sbx-e2e/user-override.yaml:ro" \ + -v "$REPO/packaging/agent-wrap.sh:/sbx-e2e/agent-wrap:ro" \ + -v "$REPO/packaging/install-agent-wrappers.sh:/sbx-e2e/install-agent-wrappers:ro" \ + "$IMAGE" \ + sleep 600 >/dev/null + +# Helper: run a command inside the container as root. +in_container() { + docker exec --user 0 "$CONTAINER" /bin/bash -c "$1" +} + +# --------------------------------------------------------------------------- +# 5. Simulate what `sbx run --kit ./docker/sbx-kit/` would do at install +# time: copy binaries to /usr/bin, the policy template to /usr/share, +# the server config to /etc, set up shim symlinks, drop the override +# fragment into the agent's home, and write the kit's initFiles. +# --------------------------------------------------------------------------- + +log "Installing kit payload inside the container..." +in_container ' +set -e + +# Binaries +install -m 0755 /sbx-e2e/bin/agentsh-shell-shim /usr/bin/agentsh-shell-shim +install -m 0755 /sbx-e2e/bin/agentsh-sbx-bootstrap /usr/bin/agentsh-sbx-bootstrap +install -m 0755 /sbx-e2e/bin/agentsh /usr/bin/agentsh + +# Policy template (read-only system path) +install -D -m 0644 /sbx-e2e/coding-agent.yaml /usr/share/agentsh/coding-agent.template.yaml + +# Server config +install -D -m 0644 /sbx-e2e/server-config.yaml /etc/agentsh/config.yaml +mkdir -p /etc/agentsh/policies + +# Shim directory + symlinks (the .goreleaser.yml ships these on real packages; +# we recreate them here from the canonical list). +mkdir -p /usr/lib/agentsh/shims +for cmd in bash sh curl wget pip pip3 npm node git python python3 rm; do + ln -sf /usr/bin/agentsh-shell-shim /usr/lib/agentsh/shims/$cmd +done + +# Kit `files/` tree — the same content `sbx run --kit` would drop. +install -D -m 0644 \ + /sbx-e2e/kit-files/workspace/.claude/skills/agentsh/SKILL.md \ + /workspace/.claude/skills/agentsh/SKILL.md +install -D -m 0644 \ + /sbx-e2e/kit-files/home/agent/.agentsh/policy.yaml \ + /home/agent/.agentsh/policy.yaml +# Replace the empty stub with the test override so the merge step has something +# meaningful to merge. +install -m 0644 /sbx-e2e/user-override.yaml /home/agent/.agentsh/policy.yaml + +# Kit `initFiles` +cat >/etc/profile.d/agentsh.sh </etc/environment.d/10-agentsh.conf <"$STAGE/bootstrap.log" 2>&1 || true + +# --------------------------------------------------------------------------- +# 7. Checks +# --------------------------------------------------------------------------- + +log +log "Verifying kit mechanics:" + +# Check 1: /run/agentsh/tier exists and says "shim". +tier=$(in_container 'cat /run/agentsh/tier 2>/dev/null || echo MISSING') +if [ "$tier" = "shim" ]; then + pass "tier file = shim" +else + fail "tier file = '$tier' (want 'shim')" + log "---- bootstrap log ----" + cat "$STAGE/bootstrap.log" + log "-----------------------" +fi + +# Check 2: PATH wiring resolves curl under the shim dir. +# We use `bash -lc` so /etc/profile.d/agentsh.sh is sourced — that's how the +# kit's initFiles delivers the shim PATH in production. +resolved=$(in_container "bash -lc 'command -v curl'" || echo NONE) +if printf '%s' "$resolved" | grep -q '^/usr/lib/agentsh/shims/curl$'; then + pass "curl resolves under /usr/lib/agentsh/shims/" +else + fail "curl resolved to '$resolved' (want /usr/lib/agentsh/shims/curl)" +fi + +# Check 3: /etc/agentsh/policies/default.yaml is the merged output. +# - Contains a baked rule name (proves the template was loaded). +# - Contains the override's appended rule (proves append-by-name worked). +# - The replaced rule's overlay paths win (proves replace-by-name worked). +merged=$(in_container 'cat /etc/agentsh/policies/default.yaml') +if printf '%s' "$merged" | grep -q 'deny-credential-paths'; then + pass "merged policy contains baked rule 'deny-credential-paths'" +else + fail "merged policy missing baked rule 'deny-credential-paths'" +fi + +if printf '%s' "$merged" | grep -q 'e2e-allow-data'; then + pass "merged policy contains appended override rule 'e2e-allow-data'" +else + fail "merged policy missing appended override rule 'e2e-allow-data'" +fi + +# The baked allow-tmp has /tmp/** and /var/tmp/**. The override added +# /srv/scratch/**. After merge, the override's paths win (replace-by-name). +if printf '%s' "$merged" | grep -q '/srv/scratch'; then + pass "merged policy contains override paths for 'allow-tmp' (replace-by-name)" +else + fail "merged policy missing /srv/scratch/** from override 'allow-tmp'" +fi + +# Check 4: Self-teaching artifacts landed where the kit's files/ tree expects. +if in_container 'test -f /workspace/.claude/skills/agentsh/SKILL.md'; then + pass "SKILL.md present at /workspace/.claude/skills/agentsh/" +else + fail "SKILL.md missing from /workspace/.claude/skills/agentsh/" +fi + +if in_container 'test -f /home/agent/.agentsh/policy.yaml'; then + pass "user override stub present at /home/agent/.agentsh/policy.yaml" +else + fail "user override stub missing from /home/agent/.agentsh/policy.yaml" +fi + +# --------------------------------------------------------------------------- +# 8. Real-agent wrapper engagement check against docker/sandbox-templates:*. +# +# This replaces the stub-based check that faked both agentsh and the agent. +# The v1 design bug: the installer used `command -v ` from a login +# shell, which resolves to /usr/local/share/npm-global/bin/ (that +# directory appears before /usr/local/bin in every npm-shipped template's +# PATH). This test pins the expected path and asserts the move-aside layout +# matches the real image — for each publicly available agent template we +# can pull. +# +# We use a stub agentsh (not a real CGO+libseccomp build) because the +# wrap-engagement assertion is purely structural: we only need to confirm +# the wrapper invokes `agentsh wrap -- ` and that the +# real agent binary still runs (stub execs through). No enforcement kernel +# machinery is exercised here, so libseccomp-dev is not required, keeping +# this check CI-friendly. +# --------------------------------------------------------------------------- + +log +log "Section 8: real-agent wrapper engagement (per-agent template):" + +# Shared stub agentsh used by every agent run. +cat >"$STAGE/bin/agentsh-stub" <<'STUB' +#!/bin/sh +# E2E stub: record invocation and exec through to the real binary. +echo "AGENTSH-MARKER: $*" >&2 +# Strip "wrap --" prefix: the real binary is the third positional arg. +shift # past "wrap" +shift # past "--" +exec "$@" +STUB +chmod +x "$STAGE/bin/agentsh-stub" + +# Per-agent check. Args: $1=image tag (suffix on docker/sandbox-templates:), +# $2=agent CLI name (the binary name `command -v` should find). +# +# Asserts that for this image: +# - The agent resolves to /usr/local/share/npm-global/bin/. +# - The installer's symlink lands there and points at agent-wrap. +# - .real exists as the moved-aside binary. +# - Invoking ` --version` through a login shell engages the wrap +# chain (AGENTSH-MARKER on stderr) and produces non-empty real output +# (proving the moved-aside binary still executes via the stub). +check_real_agent() { + local image_tag="$1" + local agent="$2" + local image="docker/sandbox-templates:${image_tag}" + local container="agentsh-sbx-e2e-${image_tag}-$$" + local label="${agent}@${image_tag}" + local expected_path="/usr/local/share/npm-global/bin/${agent}" + + log + log " ${label}:" + + if ! docker pull "$image" >/dev/null 2>&1; then + skip "${label}: image pull failed — skipping (no network/hub access)" + return 0 + fi + + docker run -d --name "$container" --user 0 "$image" sleep 600 >/dev/null + + local in_c="docker exec --user 0 $container /bin/bash -c" + + # Side-load: stub agentsh, real agent-wrap, real installer. + docker cp "$STAGE/bin/agentsh-stub" "$container:/usr/bin/agentsh" + $in_c 'mkdir -p /usr/lib/agentsh' + docker cp "$REPO/packaging/agent-wrap.sh" "$container:/usr/lib/agentsh/agent-wrap" + docker cp "$REPO/packaging/install-agent-wrappers.sh" "$container:/usr/lib/agentsh/install-agent-wrappers.sh" + $in_c 'chmod +x /usr/bin/agentsh /usr/lib/agentsh/agent-wrap /usr/lib/agentsh/install-agent-wrappers.sh' + + # Tier file (wrapper requires /run/agentsh/tier = shim). + $in_c 'mkdir -p /run/agentsh && echo shim > /run/agentsh/tier' + + # Run the installer from root login shell so it sees the production PATH. + $in_c 'bash -lc /usr/lib/agentsh/install-agent-wrappers.sh' \ + 2>"$STAGE/${image_tag}-install.log" || true + + # Discover the agent's path (must match the npm-global location). + local agent_path + agent_path=$($in_c "bash -lc 'command -v ${agent} 2>/dev/null || echo MISSING'" || echo MISSING) + local agent_real="${agent_path}.real" + + # Check N.1: discovered path is the expected npm-global location. + if [ "$agent_path" = "$expected_path" ]; then + pass "${label}: discovered at expected npm-global path ($agent_path)" + else + fail "${label}: path is '$agent_path' (want $expected_path) — layout may have changed" + fi + + # Check N.2: discovered path is a symlink to agent-wrap. + local link_target + link_target=$($in_c "readlink '$agent_path' 2>/dev/null || echo NOTLINK" || echo NOTLINK) + if [ "$link_target" = "/usr/lib/agentsh/agent-wrap" ]; then + pass "${label}: symlink points to /usr/lib/agentsh/agent-wrap" + else + fail "${label}: not a symlink to agent-wrap (readlink='$link_target')" + log "----- installer log -----" + cat "$STAGE/${image_tag}-install.log" + log "-------------------------" + fi + + # Check N.3: .real sibling exists and is executable. + if $in_c "test -x '$agent_real'" 2>/dev/null; then + pass "${label}: .real sibling exists and is executable ($agent_real)" + else + fail "${label}: .real sibling missing or not executable ($agent_real)" + fi + + # Check N.4: invoking the agent through a login shell engages the wrap. + local stdout stderr + stdout=$(docker exec --user agent "$container" bash -lc "${agent} --version 2>/dev/null" 2>/dev/null || true) + stderr=$(docker exec --user agent "$container" bash -lc "${agent} --version 2>&1 >/dev/null" 2>/dev/null || true) + + local expected_marker="AGENTSH-MARKER: wrap -- ${agent_real} --version" + if printf '%s' "$stderr" | grep -qF "$expected_marker"; then + pass "${label}: AGENTSH-MARKER present (wrap chain fired with correct argv)" + else + fail "${label}: AGENTSH-MARKER not found (want: '$expected_marker')" + log "----- ${label} stderr -----" + printf '%s\n' "$stderr" + log "---------------------------" + fi + + # Check N.5: stdout is non-empty and does not contain the marker + # (the real binary executed via the stub's exec). + if [ -n "$stdout" ] && ! printf '%s' "$stdout" | grep -q 'AGENTSH-MARKER'; then + pass "${label}: --version produced real output (real binary ran through stub)" + else + fail "${label}: --version stdout empty or contains marker" + log "----- ${label} stdout -----" + printf '%s\n' "$stdout" + log "---------------------------" + fi + + docker rm -f "$container" >/dev/null 2>&1 || true +} + +# Run against every agent template we can pull. Templates we know are public +# at the time of writing: opencode, gemini, codex. (claude template isn't +# published.) Any agent whose image pulls succeeds gets fully verified; +# others SKIP without failing the suite. +check_real_agent opencode opencode +check_real_agent gemini gemini +check_real_agent codex codex + +# --------------------------------------------------------------------------- +# 9. Summary +# --------------------------------------------------------------------------- + +log +log "summary: $PASS pass, $FAIL fail, $SKIP skip" +if [ "$FAIL" -gt 0 ]; then + log + log "---- bootstrap log ----" + cat "$STAGE/bootstrap.log" >&2 + log "-----------------------" + exit 3 +fi +exit 0 diff --git a/docs/policy-reference.md b/docs/policy-reference.md new file mode 100644 index 000000000..5676a4083 --- /dev/null +++ b/docs/policy-reference.md @@ -0,0 +1,88 @@ +# AgentSH policy reference (Docker Sandboxes edition) + +This file ships at `/usr/share/doc/agentsh/policy-reference.md` inside any +Docker Sandbox that has the AgentSH mixin kit installed. It's the canonical +reference the agent's SKILL.md points at when you (or the agent) want to add +or change a rule. + +For the full schema documented inline with examples, see +`/etc/agentsh/policies/default.yaml` — the merged policy the daemon is +currently enforcing. + +## Inspecting the live state + +| Question | Run | +|---|---| +| What enforcement tier is active? | `cat /run/agentsh/tier` (one of `shim`, `none`) | +| What policy is being enforced right now? | `cat /etc/agentsh/policies/default.yaml` | +| What are my overrides on top of the baked policy? | `cat /home/agent/.agentsh/policy.yaml` | +| Is the daemon running? | `pgrep -af 'agentsh server'` | + +## Adding rules — `~/.agentsh/policy.yaml` + +Write a partial policy. The bootstrap merges it on top of the baked +`coding-agent` template on next sandbox start. Rules that share a `name` with +a baked rule replace it; rules with new names append after the baked set. + +```yaml +version: 1 +name: my-overrides + +file_rules: + - name: allow-extra-write-area + paths: ["/data/**"] + operations: [write, create, mkdir, rename] + decision: allow + + - name: allow-workspace-write # overrides the baked rule by name + paths: ["/workspace", "/workspace/**", "/scratch/**"] + operations: [write, create, mkdir, chmod, rename] + decision: allow + +command_rules: + - name: deny-aws-cli + commands: [aws] + decision: deny + message: "aws-cli is not permitted in this sandbox" +``` + +## Rule kinds at a glance + +- `file_rules` — file open/read/write/delete/stat/list, by glob path. Decisions: `allow`, `deny`, `approve`, `audit`, `soft_delete`, `redirect`. +- `command_rules` — process exec, by command name + optional argument regex. Decisions: `allow`, `deny`, `approve`, `audit`, `redirect`. +- `signal_rules` — signal sending. Decisions: `allow`, `deny`, `audit`, `approve`, `redirect`, `absorb`. +- `network_rules` — outbound connect by domain / port / CIDR. The Docker Sandbox proxy is the primary outbound-network gate inside a sandbox; AgentSH's network rules are layered on top and apply *before* the proxy. +- `unix_socket_rules` — AF_UNIX socket connect/bind/listen. + +Each rule has `name`, `description`, the kind-specific selectors, `decision`, and an optional `message` (Go template; available variables: `.Path`, `.Command`, `.Args`, `.Decision`, `.Signal`, `.PID`). + +## Where things live + +| Path | Owner | Purpose | +|---|---|---| +| `/usr/share/agentsh/coding-agent.template.yaml` | OS package, read-only | Baked-in policy the bootstrap reads | +| `/home/agent/.agentsh/policy.yaml` | You | Override fragment (optional) | +| `/etc/agentsh/policies/default.yaml` | bootstrap (regenerated each start) | What the daemon enforces | +| `/etc/agentsh/config.yaml` | OS package | Daemon server config | +| `/run/agentsh/tier` | bootstrap | Active enforcement tier | +| `/run/agentsh/agentsh.sock` | daemon | Daemon control socket | +| `/var/log/agentsh/daemon.log` | daemon | Daemon stdout+stderr | +| `/var/log/agentsh/bootstrap.log` | bootstrap | Startup banner + tier probe result | +| `/usr/lib/agentsh/agent-wrap` | OS package, read-only | Shared wrapper script for agent binaries | +| `` | Kit install step | Symlink to agent-wrap at the agent's original PATH location; the real binary is moved aside to `.real` (e.g. `/usr/local/share/npm-global/bin/opencode` → symlink, `…/opencode.real` → moved-aside binary) | + +## Decision semantics quick reference + +- `allow` — operation proceeds. +- `audit` — operation proceeds, emit an audit event. +- `deny` — operation refused; the agent gets EACCES (or equivalent). +- `approve` — operation blocks until a human approves out-of-band. +- `soft_delete` — for file delete/rmdir operations only: the path is moved to `/var/lib/agentsh/trash/` instead of being removed. Recoverable. +- `redirect` — for `command_rules` and `connect_redirects`: the operation is rewritten to a different command or destination. +- `absorb` — for `signal_rules` only: the signal is silently consumed and never delivered to the target. + +## Reloading + +In v1, the bootstrap re-runs only at sandbox start. To pick up a new +`~/.agentsh/policy.yaml`, restart the sandbox via Docker Sandboxes. v1.1 may +add an in-place reload. diff --git a/docs/superpowers/plans/2026-05-10-db-plan-04c-simple-query-events.md b/docs/superpowers/plans/2026-05-10-db-plan-04c-simple-query-events.md new file mode 100644 index 000000000..85e57b81d --- /dev/null +++ b/docs/superpowers/plans/2026-05-10-db-plan-04c-simple-query-events.md @@ -0,0 +1,3246 @@ +# db-access Plan 04c — Simple Query + DBEvent Emission Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Continue the per-connection driver past the first upstream `ReadyForQuery`: read client `'Q'` frames, classify (Plan 03) + evaluate (Plan 02) per statement, forward-or-synthesize-deny, and emit one `db_statement` event per `ClassifiedStatement`. Adds the `Decision`/`Result`/`TxContext`/`Predicates`/`TLS` sub-structs to `events.DBEvent`, a `Normalize` method on the classifier `Parser`, statement source spans on `effects.ClassifiedStatement`, hot-swappable policy, and a real-pgx spine integration test. + +**Architecture:** Five new files under `internal/db/proxy/postgres/`: `simplequery.go` (loop), `upstreamread.go` (per-frame demux + counter accumulation), `deny.go` (synth + SQLSTATE picker), `eventbuilder.go` (pure event builder with redaction + digest), `classifiers.go` (per-dialect Parser map). Modifications to `server.go` (Config, atomic policy, dialect map, SetPolicy), `proxyconn.go` (connState extensions), `handshake.go` (call simpleQueryLoop after forwardAuth), `authforward.go` (write `'Z'` status byte into connState before returning). Plan 03's `Parser` interface gains `Normalize`; both libpg_query and wasm backends wire it. `effects.ClassifiedStatement` gains `SourceStart`/`SourceEnd`. `events.DBEvent` gets the five sub-structs. + +**Tech Stack:** Go (`//go:build linux` for all new proxy files; events/effects extensions are tag-free), `github.com/jackc/pgx/v5/pgproto3` (already a dep), `github.com/jackc/pgx/v5` added as a test-only dep for the spine integration test, `github.com/pganalyze/pg_query_go/v6` and `github.com/wasilibs/go-pgquery` (both already deps; we call their `Normalize`). + +**Settled in brainstorming (2026-05-10):** +1. Single-driver half-duplex loop. After `forwardAuth` returns on the first upstream `'Z'`, the same goroutine enters `simpleQueryLoop`. On allow-forward, `forwardUpstreamUntilRFQ` reads one upstream frame at a time until trailing `'Z'`. Async LISTEN/NOTIFY pushes outside a Q…Z round-trip are documented limitations and deferred to Plan 05. +2. `statement_digest = sha256:` + hex(SHA-256(`Normalize(slice)`)) for every redaction tier. Digest is invariant under redaction so events join across deployments with different `LogStatements` settings. Documented caveat: libpg_query and pure-Go `Normalize` outputs may differ; digests are stable *within an implementation*, not across. +3. `Normalize` lives on the classifier `Parser`, not in the proxy. libpg_query backend → `pg_query.Normalize`. wasm/wasilibs backend → `pgquery_wasm.Normalize`. On `unknown` classification we compute the digest off the verbatim trimmed SQL with a documented note. +4. Per-frame demux for the upstream response stream populates `result.rows_returned`, `result.rows_affected`, `result.bytes_in/out`, `result.latency_ms`, `result.error_code`. CommandComplete tag parsing maps the *i*-th frame to the *i*-th statement. +5. Full §8 DBEvent schema lands in 04c with partial population: `tx_started_at` and `deny_action: "rollback_injected"` defer to Plan 05's state machine. +6. Per-dialect classifier map built in `Server.New()` from `cfg.Services[*].Dialect`. Same dialect shares one `Parser`. Unexported `classifierForTest` hook for tests. +7. `Server.SetPolicy(*policy.RuleSet)` via `atomic.Pointer[policy.RuleSet]`. Each statement reads the snapshot once at evaluate time. +8. `effects.ClassifiedStatement.SourceStart` / `SourceEnd` added in 04c (scope creep onto Plan 03 accepted). +9. Frame budget enforced at `handleQuery` entry against `len(q.String)`. Mitigation-grade rather than streaming-framer ceiling. Documented. +10. Non-`'Q'` / non-`'X'` frame post-handshake → synthetic `ErrorResponse(0A000, …)` + close + `EXTENDED_QUERY_NOT_SUPPORTED` lifecycle event (or `FUNCTION_CALL_PROTOCOL_DENIED` for `'F'`). +11. `command_id = ":"` for per-stmt events in multi-stmt batches (no new wire schema field). + +**Cross-references:** +- Design: `docs/superpowers/specs/2026-05-10-db-plan-04c-simple-query-events-design.md` +- Macro design: `docs/superpowers/specs/2026-05-10-db-plan-04-pg-proxy-skeleton-design.md` +- Roadmap: `docs/superpowers/specs/2026-05-08-db-access-phase-1-roadmap-design.md` §3 Plan 04c +- Spec: `docs/agentsh-db-access-spec.md` v0.8 §7.1, §7.7, §8, §10.2, §10.3, §14.1, §14.3, §14.4 +- Predecessor plan: `docs/superpowers/plans/2026-05-10-db-plan-04b2-upstream-passthrough.md` + +--- + +## File Structure + +**Created:** + +- `internal/db/proxy/postgres/simplequery.go` — `simpleQueryLoop`, `handleQuery`, `handleUnsupportedFrame`, `MaxQueryBytes` enforcement. +- `internal/db/proxy/postgres/simplequery_test.go` — Q/X/unsupported dispatch, MaxQueryBytes cap, multi-stmt allow/deny/anyDeny matrix, approve→deny stub, RFQ-byte gating. +- `internal/db/proxy/postgres/upstreamread.go` — `forwardUpstreamUntilRFQ`, per-frame demux, CommandComplete tag parsing, counter accumulation. +- `internal/db/proxy/postgres/upstreamread_test.go` — frame-shape tests, tag-parser tests, mid-batch ErrorResponse propagation, RFQ status-byte updates. +- `internal/db/proxy/postgres/deny.go` — `synthErrorAndRFQ`, `synthErrorOnly`, `pickDenySynth`, SQLSTATE constants. +- `internal/db/proxy/postgres/deny_test.go` — synth output round-trip via `pgproto3.Frontend`, SQLSTATE selection by RuleKind, deny_message template substitution. +- `internal/db/proxy/postgres/eventbuilder.go` — pure `buildStatementEvent`; redaction tier rendering; `statement_digest`; `denied_by_sibling` tagging; `command_id` shape. +- `internal/db/proxy/postgres/eventbuilder_test.go` — three-tier render table, digest stability across tiers, multi-stmt EventID uniqueness, denied_by_sibling shape. +- `internal/db/proxy/postgres/classifiers.go` — per-dialect Parser map construction; `classifierFor`. +- `internal/db/proxy/postgres/classifiers_test.go` — dialect→Parser construction, shared-instance assertion, unknown-dialect error, test-hook override. +- `internal/db/proxy/postgres/spine_test.go` (extends existing file) — three real-pgx subtests against the existing `testupstream_test.go` fake upstream. +- `internal/db/classify/postgres/normalize.go` (Linux+CGO) — `(*cgoParser).Normalize` delegating to `pg_query.Normalize`. +- `internal/db/classify/postgres/normalize_wasm.go` (non-Linux or non-CGO) — `(*wasmParser).Normalize` delegating to `pgquery_wasm.Normalize`. +- `internal/db/classify/postgres/parser_normalize_test.go` — backend-agnostic Normalize tests via curated SQL. + +**Modified:** + +- `internal/db/effects/statement.go` — `ClassifiedStatement` gains `SourceStart int32`, `SourceEnd int32` (byte offsets into the input SQL; zero-valued when parser cannot supply). +- `internal/db/effects/statement_test.go` — coverage for new fields. +- `internal/db/classify/postgres/parser.go` — `Parser` interface gains `Normalize(sql string) (string, error)`; `classifyWithBackend` populates `SourceStart` / `SourceEnd` from `RawStmt.StmtLocation` + `StmtLen`. +- `internal/db/classify/postgres/ast_walk.go` — `classifyRawStmt` accepts the location/length and propagates into the returned `ClassifiedStatement`. +- `internal/db/classify/postgres/corpus_test.go` — golden-corpus regen for the new struct fields (zero-valued for unknown-stmt cases). +- `internal/db/events/event.go` — `DBEvent` gains `TLS`, `Decision`, `Result`, `TxContext`, `Predicates` sub-structs; new types defined in the same file. +- `internal/db/events/event_test.go` — JSON round-trip for the extended schema, including `null` propagation for `*int64` row counters. +- `internal/db/proxy/postgres/server.go` — `Config` gains `MaxQueryBytes int` and `classifierForTest func(dialect string) classify_pg.Parser`; `Server` gains `policyPtr atomic.Pointer[policy.RuleSet]` and `classifiers map[string]classify_pg.Parser`; `New()` validates dialects, builds the classifier map, applies the `MaxQueryBytes` default, stores `cfg.Policy` in the atomic pointer; `SetPolicy` / `policy()` / `classifierFor` helpers added. +- `internal/db/proxy/postgres/server_test.go` — `MaxQueryBytes` default; `SetPolicy` swap visibility; unknown-dialect `New()` rejection. +- `internal/db/proxy/postgres/proxyconn.go` — `connState` gains `lastUpstreamRFQ byte`, `redactionTier policy.RedactionTier`, `tlsMode string`; emit helpers for the new lifecycle events (`emitFrameTooLarge`, `emitUnsupportedFrame`). +- `internal/db/proxy/postgres/authforward.go` — `forwardAuth` records the observed `'Z'` status byte into `pc.state.lastUpstreamRFQ` before returning. +- `internal/db/proxy/postgres/handshake.go` — `dialUpstreamAndForward` calls `pc.simpleQueryLoop(ctx)` instead of returning `nil` after `forwardAuth` returns successfully; seeds `pc.state.redactionTier` and `pc.state.tlsMode`. +- `internal/db/policy/decode.go` — Decode appends a `Warning` with `Code: "APPROVE_NOT_YET_SUPPORTED"` for every rule with `decision: approve` when `Unavoidability != off`. +- `internal/db/policy/decode_test.go` — coverage for the warning emission. +- `go.mod` / `go.sum` — `github.com/jackc/pgx/v5` promoted from `// indirect` to a top-level test-only dep (imported only from `_test.go` files). + +**Out of scope (deferred):** + +- Extended Query / `Parse` / `Bind` / `Describe` / `Execute` / `Sync` / `Flush` / `Close`, SQL-level prepared cache, COPY data-frame handling, FunctionCall semantics, full §14 deny modes (`rollback_then_continue`), `tx_started_at`, `approve` runtime, GSSENC opt-in, async LISTEN/NOTIFY delivery — Plan 05. +- BackendKeyData mapping, cancel mapping — Plan 06. +- SO_PEERCRED → SessionID, out-of-process proxy, unavoidability bundle, real-PG testcontainer suite — Plan 07. + +--- + +## Task 1: Add `SourceStart` / `SourceEnd` to `effects.ClassifiedStatement` + +**Why:** Eventbuilder needs per-statement byte spans to slice the original `Q` body under `RedactionFull`. libpg_query exposes `RawStmt.StmtLocation` and `StmtLen` already; we surface them up. Zero values are legal for parsers that cannot supply them (`unknown` statements, edge fallback paths). + +**Files:** +- Modify: `internal/db/effects/statement.go` +- Modify: `internal/db/effects/statement_test.go` + +- [ ] **Step 1: Write the failing test for the new fields** + +Append to `internal/db/effects/statement_test.go`: + +```go +func TestClassifiedStatement_SourceSpan_RoundTrip(t *testing.T) { + in := ClassifiedStatement{ + Effects: []Effect{{Group: GroupRead, Resolution: ResolutionQualified}}, + RawVerb: "SELECT", + SourceStart: 7, + SourceEnd: 23, + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + var out ClassifiedStatement + if err := json.Unmarshal(bs, &out); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if out.SourceStart != in.SourceStart || out.SourceEnd != in.SourceEnd { + t.Fatalf("span lost: got (%d,%d) want (%d,%d)", + out.SourceStart, out.SourceEnd, in.SourceStart, in.SourceEnd) + } +} + +func TestClassifiedStatement_SourceSpan_ZeroOmitted(t *testing.T) { + in := ClassifiedStatement{ + Effects: []Effect{{Group: GroupRead, Resolution: ResolutionQualified}}, + RawVerb: "SELECT", + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if strings.Contains(string(bs), "source_start") || strings.Contains(string(bs), "source_end") { + t.Fatalf("zero span fields must be omitted: %s", bs) + } +} +``` + +If `strings` / `encoding/json` are not imported in that test file, add them. + +- [ ] **Step 2: Run tests to verify they fail to compile** + +Run: `go test ./internal/db/effects/ -run TestClassifiedStatement_SourceSpan -count=1` +Expected: build error referencing `SourceStart` / `SourceEnd`. + +- [ ] **Step 3: Add the fields** + +Modify `internal/db/effects/statement.go`, replacing the `ClassifiedStatement` struct body: + +```go +type ClassifiedStatement struct { + Effects []Effect `json:"effects"` + RawVerb string `json:"raw_verb,omitempty"` + ParserBackend ParserBackend `json:"parser_backend,omitempty"` + Error string `json:"error,omitempty"` + + // SourceStart / SourceEnd are byte offsets into the original SQL input + // (Plan 04c needs these to slice per-stmt text under RedactionFull). Both + // zero when the parser cannot supply them (e.g. unknown-statement path). + SourceStart int32 `json:"source_start,omitempty"` + SourceEnd int32 `json:"source_end,omitempty"` +} +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +Run: `go test ./internal/db/effects/ -run TestClassifiedStatement_SourceSpan -count=1 -v` +Expected: both tests PASS. + +- [ ] **Step 5: Commit** + +```bash +git add internal/db/effects/statement.go internal/db/effects/statement_test.go +git commit -m "db: effects — add SourceStart/SourceEnd to ClassifiedStatement" +``` + +--- + +## Task 2: Populate `SourceStart` / `SourceEnd` from libpg_query / wasm + +**Why:** `RawStmt.StmtLocation` + `StmtLen` exist on the protobuf for both backends. We just need to thread them through `classifyRawStmt` into the returned statement. + +**Files:** +- Modify: `internal/db/classify/postgres/parser.go` +- Modify: `internal/db/classify/postgres/ast_walk.go` +- Modify: `internal/db/classify/postgres/backend_test.go` + +- [ ] **Step 1: Write the failing test for source-span population** + +Append to `internal/db/classify/postgres/backend_test.go`: + +```go +func TestParser_SourceSpan_Single(t *testing.T) { + p := New(DialectPostgres) + sql := "SELECT 1" + got, err := p.Classify(sql, SessionState{}, Options{}) + if err != nil { + t.Fatalf("Classify: %v", err) + } + if len(got) != 1 { + t.Fatalf("len=%d want 1", len(got)) + } + if got[0].SourceStart != 0 { + t.Fatalf("SourceStart=%d want 0", got[0].SourceStart) + } + if got[0].SourceEnd != int32(len(sql)) { + t.Fatalf("SourceEnd=%d want %d", got[0].SourceEnd, len(sql)) + } +} + +func TestParser_SourceSpan_MultiStmt(t *testing.T) { + p := New(DialectPostgres) + sql := "SELECT 1; SELECT 2" + got, err := p.Classify(sql, SessionState{}, Options{}) + if err != nil { + t.Fatalf("Classify: %v", err) + } + if len(got) != 2 { + t.Fatalf("len=%d want 2", len(got)) + } + if string(sql[got[0].SourceStart:got[0].SourceEnd]) != "SELECT 1" { + t.Fatalf("stmt[0] span = %q want %q", + string(sql[got[0].SourceStart:got[0].SourceEnd]), "SELECT 1") + } + if string(sql[got[1].SourceStart:got[1].SourceEnd]) != "SELECT 2" { + t.Fatalf("stmt[1] span = %q want %q", + string(sql[got[1].SourceStart:got[1].SourceEnd]), "SELECT 2") + } +} +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `go test ./internal/db/classify/postgres/ -run TestParser_SourceSpan -count=1` +Expected: FAIL with `SourceStart=0 want 0` for single (passes) but `SourceEnd=0 want 8` (fails) — span is not populated yet. + +- [ ] **Step 3: Thread `RawStmt.StmtLocation` + `StmtLen` through dispatch** + +Modify `internal/db/classify/postgres/parser.go` `classifyWithBackend` (around the per-RawStmt loop): + +```go + out := make([]effects.ClassifiedStatement, 0, len(res.Stmts)) + for _, raw := range res.Stmts { + cs := classifyRawStmt(dialect, raw, sess, opts, backend) + // pg_query gives StmtLen=0 for a trailing single statement; in that + // case the statement runs from StmtLocation to end-of-input. + start := raw.StmtLocation + length := raw.StmtLen + var end int32 + if length == 0 { + end = int32(len(sql)) + } else { + end = start + length + } + cs.SourceStart = start + cs.SourceEnd = end + out = append(out, cs) + } + return out, nil +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +Run: `go test ./internal/db/classify/postgres/ -run TestParser_SourceSpan -count=1 -v` +Expected: both tests PASS. + +- [ ] **Step 5: Run the full classify/postgres test suite to confirm no regression** + +Run: `go test ./internal/db/classify/postgres/ -count=1` +Expected: all green. + +- [ ] **Step 6: Commit** + +```bash +git add internal/db/classify/postgres/ +git commit -m "db: classify/postgres — populate ClassifiedStatement source spans" +``` + +--- + +## Task 3: Add `Normalize(sql)` to `classify/postgres.Parser` + +**Why:** `statement_digest` and the `parameters_redacted` tier both consume normalized SQL. libpg_query and wasilibs both expose `Normalize`; we wrap them behind the existing build-tag split. + +**Files:** +- Modify: `internal/db/classify/postgres/parser.go` +- Create: `internal/db/classify/postgres/normalize.go` (Linux+CGO) +- Create: `internal/db/classify/postgres/normalize_wasm.go` (non-Linux or non-CGO) +- Create: `internal/db/classify/postgres/parser_normalize_test.go` + +- [ ] **Step 1: Write the failing test** + +Create `internal/db/classify/postgres/parser_normalize_test.go`: + +```go +package postgres + +import ( + "strings" + "testing" +) + +func TestParser_Normalize_Literals(t *testing.T) { + p := New(DialectPostgres) + cases := []struct { + name string + in string + want string + }{ + {"int literal", "SELECT 1", "SELECT $1"}, + {"string literal", "SELECT 'hello'", "SELECT $1"}, + {"two literals", "SELECT 1, 'x'", "SELECT $1, $2"}, + {"identifier preserved", "SELECT a FROM t", "SELECT a FROM t"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := p.Normalize(tc.in) + if err != nil { + t.Fatalf("Normalize(%q): %v", tc.in, err) + } + if got != tc.want { + t.Fatalf("Normalize(%q) = %q want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestParser_Normalize_MultiStatement(t *testing.T) { + p := New(DialectPostgres) + got, err := p.Normalize("SELECT 1; SELECT 'x'") + if err != nil { + t.Fatalf("Normalize: %v", err) + } + if !strings.Contains(got, "$1") || !strings.Contains(got, "$2") { + t.Fatalf("Normalize did not redact both literals: %q", got) + } +} + +func TestParser_Normalize_Error(t *testing.T) { + p := New(DialectPostgres) + _, err := p.Normalize("THIS IS NOT SQL ;;;") + if err == nil { + t.Fatalf("Normalize on malformed SQL: want err, got nil") + } +} +``` + +- [ ] **Step 2: Run test to verify it fails to compile** + +Run: `go test ./internal/db/classify/postgres/ -run TestParser_Normalize -count=1` +Expected: build error — `Normalize` is undefined on `Parser`. + +- [ ] **Step 3: Extend the `Parser` interface** + +Modify `internal/db/classify/postgres/parser.go`: + +```go +type Parser interface { + Classify(sql string, sess SessionState, opts Options) ([]effects.ClassifiedStatement, error) + // Normalize returns SQL with all literal values replaced by $N placeholders. + // On parse failure returns the parser error verbatim; callers degrade to + // the verbatim trimmed SQL for digest computation. + Normalize(sql string) (string, error) +} +``` + +- [ ] **Step 4: Implement Normalize on the CGO backend** + +Create `internal/db/classify/postgres/normalize.go`: + +```go +//go:build linux && cgo + +package postgres + +import pg_query "github.com/pganalyze/pg_query_go/v6" + +func (p *cgoParser) Normalize(sql string) (string, error) { + return pg_query.Normalize(sql) +} +``` + +- [ ] **Step 5: Implement Normalize on the wasm backend** + +Create `internal/db/classify/postgres/normalize_wasm.go`: + +```go +//go:build !linux || !cgo + +package postgres + +import pgquery_wasm "github.com/wasilibs/go-pgquery" + +func (p *wasmParser) Normalize(sql string) (string, error) { + return pgquery_wasm.Normalize(sql) +} +``` + +- [ ] **Step 6: Run tests to confirm they pass** + +Run: `go test ./internal/db/classify/postgres/ -run TestParser_Normalize -count=1 -v` +Expected: all PASS. + +- [ ] **Step 7: Cross-compile check** + +Run: `GOOS=windows go build ./...` +Expected: clean build (wasm backend selected; `Normalize` resolves). + +- [ ] **Step 8: Commit** + +```bash +git add internal/db/classify/postgres/parser.go \ + internal/db/classify/postgres/normalize.go \ + internal/db/classify/postgres/normalize_wasm.go \ + internal/db/classify/postgres/parser_normalize_test.go +git commit -m "db: classify/postgres — add Normalize to Parser interface" +``` + +--- + +## Task 4: Extend `events.DBEvent` with §8 sub-structs + +**Why:** Plan 04c emits events with `decision`, `result`, `tx_context`, `predicates`, `tls` sub-objects per spec §8. We add the Go types and JSON tags now; downstream tasks populate them. + +**Files:** +- Modify: `internal/db/events/event.go` +- Modify: `internal/db/events/event_test.go` + +- [ ] **Step 1: Write the failing test for the extended schema** + +Append to `internal/db/events/event_test.go`: + +```go +func TestDBEvent_Extended_RoundTrip(t *testing.T) { + rows := int64(7) + in := DBEvent{ + EventID: "01HJ...", + SessionID: "sess-1", + Timestamp: time.Date(2026, 5, 10, 12, 0, 0, 0, time.UTC), + DBService: "appdb", + DBFamily: "postgres", + DBDialect: "postgres", + Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, + + TLS: EventTLS{Mode: "terminate_reissue", ClientSNI: "db.example"}, + Decision: EventDecision{ + Verb: "allow", + RuleKind: "statement", + RuleName: "app-allow-read", + MatchingEffectIndex: 0, + MatchingEffectGroup: "read", + }, + Result: EventResult{ + RowsReturned: &rows, + BytesIn: 9, + BytesOut: 42, + LatencyMs: 3, + }, + TxContext: EventTxContext{InTransaction: false, DenyAction: "none"}, + Predicates: EventPredicates{HasFilter: true}, + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + var out DBEvent + if err := json.Unmarshal(bs, &out); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if out.Decision.Verb != "allow" || out.Result.LatencyMs != 3 { + t.Fatalf("round-trip mismatch: %+v", out) + } + if out.Result.RowsReturned == nil || *out.Result.RowsReturned != 7 { + t.Fatalf("rows_returned lost: %+v", out.Result.RowsReturned) + } + if out.Result.RowsAffected != nil { + t.Fatalf("rows_affected must be nil for null in wire form: %+v", + out.Result.RowsAffected) + } +} + +func TestDBEvent_Extended_RowsNull(t *testing.T) { + in := DBEvent{ + EventID: "01HJ...", + Timestamp: time.Now().UTC().Truncate(time.Second), + Result: EventResult{BytesIn: 9, BytesOut: 0, LatencyMs: 0}, + TxContext: EventTxContext{DenyAction: "none"}, + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if !strings.Contains(string(bs), `"rows_returned":null`) { + t.Fatalf("rows_returned must serialise as null when nil; got %s", bs) + } +} +``` + +- [ ] **Step 2: Run test to verify it fails to compile** + +Run: `go test ./internal/db/events/ -run TestDBEvent_Extended -count=1` +Expected: build error referencing `EventTLS` / `EventDecision` / `EventResult` / `EventTxContext` / `EventPredicates`. + +- [ ] **Step 3: Add the sub-struct types and extend `DBEvent`** + +Replace the `DBEvent` struct in `internal/db/events/event.go` (and append the new types): + +```go +type DBEvent struct { + EventID string `json:"event_id"` + SessionID string `json:"session_id"` + CommandID string `json:"command_id,omitempty"` + Timestamp time.Time `json:"ts"` + + DBService string `json:"db_service"` + DBFamily string `json:"db_family"` + DBDialect string `json:"db_dialect"` + DBUser string `json:"db_user,omitempty"` + ApplicationName string `json:"application_name,omitempty"` + ClientIdentity string `json:"client_identity,omitempty"` + + Effects []effects.Effect `json:"effects"` + + OperationGroup string `json:"operation_group,omitempty"` + OperationGroupID uint8 `json:"operation_group_id,omitempty"` + OperationSubtype string `json:"operation_subtype,omitempty"` + RawVerb string `json:"raw_verb,omitempty"` + ObjectResolution string `json:"object_resolution,omitempty"` + + StatementDigest string `json:"statement_digest,omitempty"` + StatementText string `json:"statement_text,omitempty"` + StatementRedaction Redaction `json:"statement_redaction"` + + ParserBackend effects.ParserBackend `json:"parser_backend,omitempty"` + + TLS EventTLS `json:"tls"` + Decision EventDecision `json:"decision"` + Result EventResult `json:"result"` + TxContext EventTxContext `json:"tx_context"` + Predicates EventPredicates `json:"predicates,omitempty"` +} + +// EventTLS mirrors spec §8 tls{}. UpstreamCertSubject is unpopulated in 04c. +type EventTLS struct { + Mode string `json:"mode"` + ClientSNI string `json:"client_sni,omitempty"` + UpstreamCertSubject string `json:"upstream_cert_subject,omitempty"` +} + +// EventDecision mirrors spec §8 decision{}. Verb is one of "allow"|"deny"| +// "approve"|"audit" (approve never emitted live in 04c; the runtime stubs it +// out as deny + APPROVE_NOT_YET_SUPPORTED). +type EventDecision struct { + Verb string `json:"verb"` + RuleKind string `json:"rule_kind"` + RuleName string `json:"rule_name,omitempty"` + MatchingEffectIndex int `json:"matching_effect_index"` + MatchingEffectGroup string `json:"matching_effect_group,omitempty"` + Reason string `json:"reason,omitempty"` + ContributingAuditRules []string `json:"contributing_audit_rules,omitempty"` +} + +// EventResult mirrors spec §8 result{}. RowsReturned / RowsAffected are +// pointers so JSON wire form carries null for "not applicable". +type EventResult struct { + RowsReturned *int64 `json:"rows_returned"` + RowsAffected *int64 `json:"rows_affected"` + BytesIn int64 `json:"bytes_in"` + BytesOut int64 `json:"bytes_out"` + LatencyMs int64 `json:"latency_ms"` + ErrorCode string `json:"error_code,omitempty"` +} + +// EventTxContext mirrors spec §8 tx_context{}. TxStartedAt is zero-valued +// in 04c; Plan 05's state machine populates it. DenyAction is one of +// "none"|"connection_terminated"|"rollback_injected" (last value Plan 05). +type EventTxContext struct { + InTransaction bool `json:"in_transaction"` + TxStartedAt time.Time `json:"tx_started_at,omitempty"` + DenyAction string `json:"deny_action"` +} + +// EventPredicates mirrors spec §8 predicates{}. +type EventPredicates struct { + HasFilter bool `json:"has_filter"` +} +``` + +- [ ] **Step 4: Run tests to confirm they pass** + +Run: `go test ./internal/db/events/ -count=1 -v` +Expected: all PASS, including the two new tests. + +- [ ] **Step 5: Cross-compile check** + +Run: `GOOS=windows go build ./...` +Expected: clean. + +- [ ] **Step 6: Commit** + +```bash +git add internal/db/events/event.go internal/db/events/event_test.go +git commit -m "db: events — extend DBEvent with §8 sub-structs" +``` + +--- + +## Task 5: Add `MaxQueryBytes` + dialect classifier map + atomic policy pointer to `Server` + +**Why:** Three pieces of `Server` plumbing the rest of the plan needs: per-query frame budget, per-service classifier resolution, hot-swappable policy snapshot. Done together so the `Server` struct only churns once. + +**Files:** +- Modify: `internal/db/proxy/postgres/server.go` +- Create: `internal/db/proxy/postgres/classifiers.go` +- Create: `internal/db/proxy/postgres/classifiers_test.go` +- Modify: `internal/db/proxy/postgres/server_test.go` + +- [ ] **Step 1: Write failing tests for the new behavior** + +Append to `internal/db/proxy/postgres/server_test.go`: + +```go +func TestServer_New_AppliesMaxQueryBytesDefault(t *testing.T) { + s := newTestServer(t, withService(testService(t, "appdb", "postgres"))) + if got := s.cfg.MaxQueryBytes; got != 1<<20 { + t.Fatalf("MaxQueryBytes default = %d want %d", got, 1<<20) + } +} + +func TestServer_New_HonorsMaxQueryBytesOverride(t *testing.T) { + s := newTestServer(t, + withService(testService(t, "appdb", "postgres")), + withMaxQueryBytes(4096), + ) + if got := s.cfg.MaxQueryBytes; got != 4096 { + t.Fatalf("MaxQueryBytes = %d want 4096", got) + } +} + +func TestServer_SetPolicy_AtomicSwap(t *testing.T) { + s := newTestServer(t, withService(testService(t, "appdb", "postgres"))) + if got := s.policy(); got != nil { + t.Fatalf("initial policy = %p want nil", got) + } + rs := &policy.RuleSet{} + s.SetPolicy(rs) + if got := s.policy(); got != rs { + t.Fatalf("policy() after SetPolicy = %p want %p", got, rs) + } +} + +func TestServer_New_RejectsUnknownDialect(t *testing.T) { + svc := testService(t, "appdb", "rabbitql") // not a real dialect + _, err := New(Config{ + Unavoidability: service.UnavoidabilityObserve, + Services: []Service{svc}, + StateDir: t.TempDir(), + Sink: events.NopSink{}, + }) + if err == nil || !strings.Contains(err.Error(), "rabbitql") { + t.Fatalf("New on unknown dialect: err = %v", err) + } +} +``` + +(`newTestServer`, `withService`, `withMaxQueryBytes`, `testService` are existing helpers in `server_test.go` from 04a/b/b₂ — extend `withMaxQueryBytes` next; the others exist.) + +- [ ] **Step 2: Run the tests to verify they fail** + +Run: `go test ./internal/db/proxy/postgres/ -run "TestServer_New_AppliesMaxQueryBytesDefault|TestServer_New_HonorsMaxQueryBytesOverride|TestServer_SetPolicy_AtomicSwap|TestServer_New_RejectsUnknownDialect" -count=1` +Expected: build errors (missing fields/methods/helpers). + +- [ ] **Step 3: Add the new test helper** + +Append to `internal/db/proxy/postgres/server_test.go`: + +```go +func withMaxQueryBytes(n int) testServerOpt { + return func(c *Config) { c.MaxQueryBytes = n } +} +``` + +- [ ] **Step 4: Extend `Config` + add the atomic pointer + helper methods** + +Modify `internal/db/proxy/postgres/server.go`: + +Add imports `sync/atomic` and the classify package alias (use the existing convention): + +```go +import ( + // ...existing... + "sync/atomic" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" +) +``` + +Extend `Config`: + +```go +type Config struct { + // ...existing fields unchanged... + + // MaxQueryBytes caps the 'Q' frame body. Default 1 MiB when zero. + // Statements above the cap get a synthetic ErrorResponse(54000) + close. + MaxQueryBytes int + + // classifierForTest, when non-nil, overrides the per-dialect Parser map + // built by New(). Test-only — production callsites must leave this nil. + classifierForTest func(dialect string) classify_pg.Parser +} +``` + +Extend `Server`: + +```go +type Server struct { + // ...existing fields unchanged... + + policyPtr atomic.Pointer[policy.RuleSet] + classifiers map[string]classify_pg.Parser +} +``` + +Add helpers at the bottom of the file: + +```go +// SetPolicy atomically replaces the active rule set. A nil ruleset means +// "implicit deny everywhere" (matches policy.Evaluate(stmt, nil, _)). +func (s *Server) SetPolicy(rs *policy.RuleSet) { s.policyPtr.Store(rs) } + +func (s *Server) policy() *policy.RuleSet { return s.policyPtr.Load() } +``` + +- [ ] **Step 5: Wire the defaults + classifier map + policy seed into `New()`** + +In `New()`, after the existing per-service validation loop and before `return &Server{...}`, add: + +```go + if cfg.MaxQueryBytes == 0 { + cfg.MaxQueryBytes = 1 << 20 + } + + classifiers, err := buildClassifierMap(cfg.Services) + if err != nil { + return nil, err + } +``` + +Replace the final `return &Server{...}` to capture `classifiers` and seed `policyPtr`: + +```go + srv := &Server{ + cfg: cfg, + logger: cfg.Logger, + done: make(chan struct{}), + uidAllowed: func(uid uint32) bool { return uid == uint32(os.Getuid()) }, + classifiers: classifiers, + } + srv.policyPtr.Store(cfg.Policy) + return srv, nil +``` + +Apply the same `MaxQueryBytes` default and `policyPtr.Store(cfg.Policy)` to the sentinel-server path so a Plan-05+ caller that flips to `observe` mid-test still sees the seeded policy. + +- [ ] **Step 6: Create `classifiers.go`** + +Create `internal/db/proxy/postgres/classifiers.go`: + +```go +//go:build linux + +package postgres + +import ( + "fmt" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" +) + +// buildClassifierMap constructs one Parser per distinct dialect across the +// supplied services. Services sharing a dialect share a Parser instance. +// Returns an error when any service's Dialect is not a recognized name. +func buildClassifierMap(svcs []Service) (map[string]classify_pg.Parser, error) { + out := make(map[string]classify_pg.Parser, 4) + for _, svc := range svcs { + if _, ok := out[svc.Dialect]; ok { + continue + } + d, ok := classify_pg.ParseDialect(svc.Dialect) + if !ok { + return nil, fmt.Errorf("postgres.New: services[%q].Dialect = %q is not a recognized dialect", + svc.Name, svc.Dialect) + } + out[svc.Dialect] = classify_pg.New(d) + } + return out, nil +} + +// classifierFor returns the parser registered for the given dialect. Falls +// back to the "postgres" parser if a lookup fails — buildClassifierMap +// validated dialects at New(), so this should not happen in practice. +// classifierForTest, when set on Config, overrides the map entirely. +func (s *Server) classifierFor(dialect string) classify_pg.Parser { + if s.cfg.classifierForTest != nil { + return s.cfg.classifierForTest(dialect) + } + if p, ok := s.classifiers[dialect]; ok { + return p + } + return s.classifiers["postgres"] +} +``` + +- [ ] **Step 7: Create `classifiers_test.go`** + +Create `internal/db/proxy/postgres/classifiers_test.go`: + +```go +//go:build linux + +package postgres + +import ( + "testing" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" +) + +func TestBuildClassifierMap_PerDialect(t *testing.T) { + svcs := []Service{ + {Name: "a", Family: "postgres", Dialect: "postgres", Listen: ServiceListener{Kind: "unix", Path: "/tmp/a"}}, + {Name: "b", Family: "postgres", Dialect: "postgres", Listen: ServiceListener{Kind: "unix", Path: "/tmp/b"}}, + {Name: "c", Family: "postgres", Dialect: "cockroachdb", Listen: ServiceListener{Kind: "unix", Path: "/tmp/c"}}, + } + m, err := buildClassifierMap(svcs) + if err != nil { + t.Fatalf("buildClassifierMap: %v", err) + } + if len(m) != 2 { + t.Fatalf("map size = %d want 2 (postgres, cockroachdb)", len(m)) + } + if m["postgres"] == nil || m["cockroachdb"] == nil { + t.Fatalf("expected entries for both dialects, got %+v", m) + } +} + +func TestBuildClassifierMap_RejectsUnknown(t *testing.T) { + _, err := buildClassifierMap([]Service{ + {Name: "x", Family: "postgres", Dialect: "rabbitql"}, + }) + if err == nil { + t.Fatalf("expected error for unknown dialect, got nil") + } +} + +func TestServer_ClassifierFor_TestHookOverride(t *testing.T) { + calls := 0 + hook := func(dialect string) classify_pg.Parser { + calls++ + return classify_pg.New(classify_pg.DialectPostgres) + } + s := newTestServer(t, + withService(testService(t, "appdb", "postgres")), + func(c *Config) { c.classifierForTest = hook }, + ) + _ = s.classifierFor("postgres") + _ = s.classifierFor("anything") + if calls != 2 { + t.Fatalf("hook called %d times, want 2", calls) + } +} +``` + +- [ ] **Step 8: Run all tests to confirm they pass** + +Run: `go test ./internal/db/proxy/postgres/ -count=1` +Expected: all green. + +- [ ] **Step 9: Cross-compile check** + +Run: `GOOS=windows go build ./...` +Expected: clean. + +- [ ] **Step 10: Commit** + +```bash +git add internal/db/proxy/postgres/server.go \ + internal/db/proxy/postgres/server_test.go \ + internal/db/proxy/postgres/classifiers.go \ + internal/db/proxy/postgres/classifiers_test.go +git commit -m "db: proxy — MaxQueryBytes, dialect map, atomic policy on Server" +``` + +--- + +## Task 6: Extend `connState` and capture `'Z'` byte in `forwardAuth` + +**Why:** The Simple Query loop needs to know the most recent upstream `'Z'` status byte to gate deny synthesis, plus the redaction tier and TLS mode for event building. We piggy-back on `forwardAuth` recording the byte before it returns (it already has the frame in scope). + +**Files:** +- Modify: `internal/db/proxy/postgres/proxyconn.go` +- Modify: `internal/db/proxy/postgres/authforward.go` +- Modify: `internal/db/proxy/postgres/authforward_test.go` + +- [ ] **Step 1: Write failing test asserting `'Z'` byte capture** + +Append to `internal/db/proxy/postgres/authforward_test.go`: + +```go +func TestForwardAuth_CapturesUpstreamRFQByte(t *testing.T) { + // Use the existing forwardAuth test scaffold: fake upstream that + // sends AuthenticationOk + ReadyForQuery{TxStatus: 'I'} after one + // client message. After forwardAuth returns, connState.lastUpstreamRFQ + // must equal 'I'. + pc, fake := newForwardAuthFixture(t, withInitialServerScript([]pgproto3.BackendMessage{ + &pgproto3.AuthenticationOk{}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + })) + defer fake.Close() + if err := forwardAuth(context.Background(), pc); err != nil { + t.Fatalf("forwardAuth: %v", err) + } + if pc.state.lastUpstreamRFQ != 'I' { + t.Fatalf("lastUpstreamRFQ = %q want 'I'", pc.state.lastUpstreamRFQ) + } +} +``` + +`newForwardAuthFixture` and `withInitialServerScript` are existing helpers from 04b₂'s `authforward_test.go` — extend if needed. + +- [ ] **Step 2: Run test to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run TestForwardAuth_CapturesUpstreamRFQByte -count=1` +Expected: build error referencing `pc.state.lastUpstreamRFQ`. + +- [ ] **Step 3: Extend `connState`** + +Modify `internal/db/proxy/postgres/proxyconn.go`'s `connState` struct, adding three fields: + +```go +type connState struct { + // ...existing fields unchanged... + + lastUpstreamRFQ byte // 'I' | 'T' | 'E' | 0 (pre-auth) + redactionTier policy.RedactionTier // resolved at handshake end + tlsMode string // svc.TLSMode at handshake end, for EventTLS.Mode +} +``` + +Add the import `"github.com/agentsh/agentsh/internal/db/policy"` if not already present. + +- [ ] **Step 4: Capture the byte in `forwardAuth`** + +Modify `internal/db/proxy/postgres/authforward.go`. In `forwardUpstreamToClientUntilRFQ`, the `*pgproto3.ReadyForQuery` arm currently looks like: + +```go + case *pgproto3.ReadyForQuery: + pc.backend.Send(m) + if err := pc.backend.Flush(); err != nil { + return fmt.Errorf("flush after RFQ: %w", err) + } + return nil +``` + +Replace with: + +```go + case *pgproto3.ReadyForQuery: + pc.state.lastUpstreamRFQ = m.TxStatus + pc.backend.Send(m) + if err := pc.backend.Flush(); err != nil { + return fmt.Errorf("flush after RFQ: %w", err) + } + return nil +``` + +- [ ] **Step 5: Run tests to confirm passing** + +Run: `go test ./internal/db/proxy/postgres/ -count=1 -run TestForwardAuth` +Expected: all PASS, including the new one. Pre-existing forward-auth tests must still pass. + +- [ ] **Step 6: Commit** + +```bash +git add internal/db/proxy/postgres/proxyconn.go \ + internal/db/proxy/postgres/authforward.go \ + internal/db/proxy/postgres/authforward_test.go +git commit -m "db: proxy — capture upstream RFQ status byte into connState" +``` + +--- + +## Task 7: `simpleQueryLoop` scaffold + frame dispatch + non-Q reject + +**Why:** Establish the loop's outer skeleton — read frames, dispatch `'Q'`/`'X'`/other — with the non-Q reject path complete (lifecycle event, synthetic `ErrorResponse(0A000)`). `handleQuery` is a stub that synthesizes `ErrorResponse(58030, "handleQuery not implemented yet")` so tests for the dispatcher can run before subsequent tasks fill `handleQuery` in. + +**Files:** +- Create: `internal/db/proxy/postgres/simplequery.go` +- Create: `internal/db/proxy/postgres/simplequery_test.go` +- Modify: `internal/db/proxy/postgres/proxyconn.go` (lifecycle-emit helpers) + +- [ ] **Step 1: Write failing test asserting non-Q reject** + +Create `internal/db/proxy/postgres/simplequery_test.go`: + +```go +//go:build linux + +package postgres + +import ( + "context" + "testing" + + "github.com/jackc/pgx/v5/pgproto3" + + "github.com/agentsh/agentsh/internal/db/events" +) + +func TestSimpleQueryLoop_RejectsExtendedQuery(t *testing.T) { + pc, clientSide, sink := newSimpleQueryFixture(t) + pc.state.lastUpstreamRFQ = 'I' + + // Send Parse, which Plan 04c rejects. + parse := &pgproto3.Parse{Name: "s1", Query: "SELECT 1"} + mustSendFromClient(t, clientSide, parse) + + if err := pc.simpleQueryLoop(context.Background()); err == nil { + t.Fatalf("simpleQueryLoop: want non-nil error on extended-query frame") + } + + msg := mustReceiveClientFrame(t, clientSide) + er, ok := msg.(*pgproto3.ErrorResponse) + if !ok { + t.Fatalf("unexpected first frame: %T", msg) + } + if er.SQLState != "0A000" { + t.Fatalf("SQLState = %q want 0A000", er.SQLState) + } + + events := sink.DrainLifecycle() + if len(events) != 1 || events[0].Kind != "db_handshake_fail" { + t.Fatalf("lifecycle events = %+v", events) + } + if events[0].ErrorCode != "EXTENDED_QUERY_NOT_SUPPORTED" { + t.Fatalf("ErrorCode = %q want EXTENDED_QUERY_NOT_SUPPORTED", events[0].ErrorCode) + } +} + +func TestSimpleQueryLoop_RejectsFunctionCall(t *testing.T) { + pc, clientSide, sink := newSimpleQueryFixture(t) + pc.state.lastUpstreamRFQ = 'I' + + mustSendFromClient(t, clientSide, &pgproto3.FunctionCall{Function: 1234}) + + if err := pc.simpleQueryLoop(context.Background()); err == nil { + t.Fatalf("simpleQueryLoop: want non-nil error on FunctionCall") + } + + msg := mustReceiveClientFrame(t, clientSide) + er, ok := msg.(*pgproto3.ErrorResponse) + if !ok { + t.Fatalf("unexpected first frame: %T", msg) + } + if er.SQLState != "42501" { + t.Fatalf("SQLState = %q want 42501", er.SQLState) + } + + evs := sink.DrainLifecycle() + _ = evs // shape: db_handshake_fail with FUNCTION_CALL_PROTOCOL_DENIED + if len(evs) != 1 || evs[0].ErrorCode != "FUNCTION_CALL_PROTOCOL_DENIED" { + t.Fatalf("lifecycle events = %+v", evs) + } +} + +func TestSimpleQueryLoop_TerminateForwarded(t *testing.T) { + pc, clientSide, _ := newSimpleQueryFixtureWithUpstream(t) + pc.state.lastUpstreamRFQ = 'I' + + mustSendFromClient(t, clientSide, &pgproto3.Terminate{}) + + if err := pc.simpleQueryLoop(context.Background()); err != nil { + t.Fatalf("simpleQueryLoop on Terminate: %v", err) + } + // fake upstream's read side will have received a Terminate; assertion + // lives in newSimpleQueryFixtureWithUpstream's cleanup. + _ = events.LifecycleEvent{} +} +``` + +Helper `newSimpleQueryFixture` / `newSimpleQueryFixtureWithUpstream` / `mustSendFromClient` / `mustReceiveClientFrame` / `sink.DrainLifecycle()` go in a new fixture block in the same file: + +```go +func newSimpleQueryFixture(t *testing.T) (*proxyConn, *pgproto3.Frontend, *events.SyncSink) { + t.Helper() + clientPipe, proxyPipe := net.Pipe() + t.Cleanup(func() { _ = clientPipe.Close(); _ = proxyPipe.Close() }) + sink := &events.SyncSink{} + svc := Service{Name: "test", Family: "postgres", Dialect: "postgres", TLSMode: "terminate_reissue"} + srv := newTestServer(t, withService(svc), withSink(sink)) + pc := newProxyConn(srv, svc, proxyPipe, uint32(os.Getuid())) + clientFE := pgproto3.NewFrontend(clientPipe, clientPipe) + return pc, clientFE, sink +} + +func newSimpleQueryFixtureWithUpstream(t *testing.T) (*proxyConn, *pgproto3.Frontend, *events.SyncSink) { + pc, clientFE, sink := newSimpleQueryFixture(t) + upPipeClient, upPipeServer := net.Pipe() + t.Cleanup(func() { _ = upPipeClient.Close(); _ = upPipeServer.Close() }) + pc.state.upstream = upPipeServer + pc.state.upstreamFE = pgproto3.NewFrontend(upPipeServer, upPipeServer) + // Drain anything the proxy sends upstream to avoid blocking. + go func() { + b := make([]byte, 4096) + for { + if _, err := upPipeClient.Read(b); err != nil { + return + } + } + }() + return pc, clientFE, sink +} + +func mustSendFromClient(t *testing.T, fe *pgproto3.Frontend, m pgproto3.FrontendMessage) { + t.Helper() + fe.Send(m) + if err := fe.Flush(); err != nil { + t.Fatalf("client send: %v", err) + } +} + +func mustReceiveClientFrame(t *testing.T, fe *pgproto3.Frontend) pgproto3.BackendMessage { + t.Helper() + m, err := fe.Receive() + if err != nil { + t.Fatalf("client recv: %v", err) + } + return m +} +``` + +Add `events.SyncSink.DrainLifecycle()` if it does not exist yet — check `internal/db/events/sink.go` and add a sibling to `Drain()`. + +- [ ] **Step 2: Run test to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run TestSimpleQueryLoop -count=1` +Expected: build errors referencing `pc.simpleQueryLoop`, the emit helpers, and possibly `DrainLifecycle`. + +- [ ] **Step 3: Add lifecycle-emit helpers to `proxyconn.go`** + +Append to `internal/db/proxy/postgres/proxyconn.go`: + +```go +// emitFrameTooLarge emits a db_handshake_fail event with error_code +// FRAME_TOO_LARGE. Used when the client sends a 'Q' body above MaxQueryBytes. +func (pc *proxyConn) emitFrameTooLarge(ctx context.Context, size int) { + if pc.srv.cfg.Sink == nil { + return + } + _ = pc.srv.cfg.Sink.EmitLifecycle(ctx, events.LifecycleEvent{ + EventID: newEventID(), + Timestamp: timeNow(), + DBService: pc.svc.Name, + ClientIdentity: pc.state.clientIdentity, + Kind: "db_handshake_fail", + ErrorCode: "FRAME_TOO_LARGE", + Reason: fmt.Sprintf("statement too large for AgentSH proxy: %d bytes > %d cap", size, pc.srv.cfg.MaxQueryBytes), + PeerUID: pc.state.peerUID, + }) +} + +// emitUnsupportedFrame emits a db_handshake_fail event when the client sends +// a Plan-05 frame (Parse/Bind/Describe/Execute/Sync/Flush/Close/FunctionCall) +// post-handshake. errorCode distinguishes FUNCTION_CALL_PROTOCOL_DENIED from +// the generic EXTENDED_QUERY_NOT_SUPPORTED. +func (pc *proxyConn) emitUnsupportedFrame(ctx context.Context, errorCode, frameType string) { + if pc.srv.cfg.Sink == nil { + return + } + _ = pc.srv.cfg.Sink.EmitLifecycle(ctx, events.LifecycleEvent{ + EventID: newEventID(), + Timestamp: timeNow(), + DBService: pc.svc.Name, + ClientIdentity: pc.state.clientIdentity, + Kind: "db_handshake_fail", + ErrorCode: errorCode, + Reason: "frame " + frameType + " not supported in AgentSH proxy phase 1", + PeerUID: pc.state.peerUID, + }) +} +``` + +Add `"fmt"` import if not present. + +- [ ] **Step 4: Create `simplequery.go` scaffold** + +Create `internal/db/proxy/postgres/simplequery.go`: + +```go +//go:build linux + +package postgres + +import ( + "context" + "errors" + "fmt" + + "github.com/jackc/pgx/v5/pgproto3" +) + +var ( + errInTxTerminate = errors.New("postgres.simpleQueryLoop: in-tx deny terminated connection") + errFrameTooLargeClose = errors.New("postgres.simpleQueryLoop: frame budget exceeded; conn closed") + errUnsupportedFrame = errors.New("postgres.simpleQueryLoop: unsupported frame type; conn closed") +) + +// simpleQueryLoop is the post-handshake driver. It reads client frames one at +// a time, dispatches to handleQuery for 'Q', forwards 'X' (Terminate), and +// rejects any other frame with a synthetic ErrorResponse. +func (pc *proxyConn) simpleQueryLoop(ctx context.Context) error { + for { + if err := ctx.Err(); err != nil { + return err + } + msg, err := pc.backend.Receive() + if err != nil { + return err + } + switch m := msg.(type) { + case *pgproto3.Query: + if err := pc.handleQuery(ctx, m); err != nil { + return err + } + case *pgproto3.Terminate: + if pc.state.upstreamFE != nil { + pc.state.upstreamFE.Send(m) + _ = pc.state.upstreamFE.Flush() + } + return nil + default: + return pc.handleUnsupportedFrame(ctx, m) + } + } +} + +// handleUnsupportedFrame synthesizes ErrorResponse for any non-Q/non-X +// post-handshake frame and closes the connection. Distinguishes +// FunctionCall (PG 42501) from generic extended-query frames (0A000). +func (pc *proxyConn) handleUnsupportedFrame(ctx context.Context, msg pgproto3.FrontendMessage) error { + frameType := fmt.Sprintf("%T", msg) + if _, isFunc := msg.(*pgproto3.FunctionCall); isFunc { + pc.emitUnsupportedFrame(ctx, "FUNCTION_CALL_PROTOCOL_DENIED", "FunctionCall") + _ = pc.synthesizeError("42501", "FunctionCall sub-protocol denied by AgentSH policy") + return errUnsupportedFrame + } + pc.emitUnsupportedFrame(ctx, "EXTENDED_QUERY_NOT_SUPPORTED", frameType) + _ = pc.synthesizeError("0A000", "Extended Query / COPY / FunctionCall not supported in AgentSH proxy phase 1") + return errUnsupportedFrame +} + +// handleQuery is filled in by Task 12 (allow) and Task 13 (deny). For now +// it returns an error to keep the loop progressing in tests. +func (pc *proxyConn) handleQuery(ctx context.Context, q *pgproto3.Query) error { + _ = ctx + return pc.synthesizeError("58030", "handleQuery not yet implemented in scaffold") +} +``` + +`pc.synthesizeError` already exists (used by 04b₂'s handshake) — it writes `ErrorResponse{SQLState, Message}` and flushes. + +- [ ] **Step 5: Add `SyncSink.DrainLifecycle`** + +Modify `internal/db/events/sink.go` to add (next to `Drain`): + +```go +// DrainLifecycle returns and clears all lifecycle events captured so far. +func (s *SyncSink) DrainLifecycle() []LifecycleEvent { + s.mu.Lock() + defer s.mu.Unlock() + out := s.lifecycle + s.lifecycle = nil + return out +} +``` + +If `SyncSink` does not yet have a `lifecycle []LifecycleEvent` field, add it and have `EmitLifecycle` append to it (mirror the existing `EmitStatement` pattern). + +- [ ] **Step 6: Run tests to confirm passing** + +Run: `go test ./internal/db/proxy/postgres/ -run TestSimpleQueryLoop -count=1 -v` +Expected: three subtests PASS. + +- [ ] **Step 7: Commit** + +```bash +git add internal/db/proxy/postgres/simplequery.go \ + internal/db/proxy/postgres/simplequery_test.go \ + internal/db/proxy/postgres/proxyconn.go \ + internal/db/events/sink.go +git commit -m "db: proxy — simpleQueryLoop scaffold + reject non-Q/non-X frames" +``` + +--- + +## Task 8: Frame budget cap (`MaxQueryBytes`) in `handleQuery` + +**Why:** Reject `'Q'` bodies above `MaxQueryBytes` with `ErrorResponse(54000)` and a `FRAME_TOO_LARGE` lifecycle event. Done before classifier work so subsequent tasks don't have to repeatedly handle the over-cap case. + +**Files:** +- Modify: `internal/db/proxy/postgres/simplequery.go` +- Modify: `internal/db/proxy/postgres/simplequery_test.go` + +- [ ] **Step 1: Write failing test** + +Append to `internal/db/proxy/postgres/simplequery_test.go`: + +```go +func TestHandleQuery_FrameTooLarge(t *testing.T) { + pc, clientFE, sink := newSimpleQueryFixture(t) + pc.state.lastUpstreamRFQ = 'I' + pc.srv.cfg.MaxQueryBytes = 32 + + big := &pgproto3.Query{String: strings.Repeat("SELECT 1; ", 10)} // > 32 bytes + mustSendFromClient(t, clientFE, big) + + if err := pc.simpleQueryLoop(context.Background()); err == nil { + t.Fatalf("simpleQueryLoop on oversized Q: want err, got nil") + } + + msg := mustReceiveClientFrame(t, clientFE) + er, ok := msg.(*pgproto3.ErrorResponse) + if !ok || er.SQLState != "54000" { + t.Fatalf("expected ErrorResponse(54000), got %T %+v", msg, msg) + } + + rfq := mustReceiveClientFrame(t, clientFE) + if _, ok := rfq.(*pgproto3.ReadyForQuery); !ok { + t.Fatalf("expected ReadyForQuery after FRAME_TOO_LARGE, got %T", rfq) + } + + ev := sink.DrainLifecycle() + if len(ev) != 1 || ev[0].ErrorCode != "FRAME_TOO_LARGE" { + t.Fatalf("lifecycle = %+v", ev) + } +} +``` + +Add `"strings"` import if missing. + +- [ ] **Step 2: Run test to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run TestHandleQuery_FrameTooLarge -count=1` +Expected: FAIL — current stub returns 58030, not 54000. + +- [ ] **Step 3: Add the cap check to `handleQuery`** + +Replace `handleQuery` in `internal/db/proxy/postgres/simplequery.go`: + +```go +func (pc *proxyConn) handleQuery(ctx context.Context, q *pgproto3.Query) error { + if len(q.String) > pc.srv.cfg.MaxQueryBytes { + pc.emitFrameTooLarge(ctx, len(q.String)) + _ = pc.synthErrorAndRFQ("54000", + fmt.Sprintf("statement too large for AgentSH proxy: %d bytes > %d cap", + len(q.String), pc.srv.cfg.MaxQueryBytes)) + return errFrameTooLargeClose + } + // Allow/deny paths filled in by later tasks. + return pc.synthesizeError("58030", "handleQuery not yet implemented in scaffold") +} +``` + +`synthErrorAndRFQ` does not exist yet — declared in Task 10 (`deny.go`). For Task 8 we use a placeholder until then: open `internal/db/proxy/postgres/deny.go`, no — to avoid a forward-reference, inline the synth here in this task: + +```go +func (pc *proxyConn) synthErrorAndRFQTmp(sqlstate, msg string) error { + pc.backend.Send(&pgproto3.ErrorResponse{Severity: "ERROR", SQLState: sqlstate, Message: msg}) + pc.backend.Send(&pgproto3.ReadyForQuery{TxStatus: 'I'}) + return pc.backend.Flush() +} +``` + +And call `pc.synthErrorAndRFQTmp(...)` in `handleQuery`. Task 10 replaces the `_Tmp` helper and updates callers. + +- [ ] **Step 4: Run tests to confirm** + +Run: `go test ./internal/db/proxy/postgres/ -run TestHandleQuery_FrameTooLarge -count=1 -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add internal/db/proxy/postgres/simplequery.go \ + internal/db/proxy/postgres/simplequery_test.go +git commit -m "db: proxy — enforce MaxQueryBytes frame budget" +``` + +--- + +## Task 9: `upstreamread.go` — per-frame demux + counter accumulation + +**Why:** Implement the allow-forward response reader as a pure(-ish) function that reads upstream frames one at a time, forwards each to the client, accumulates per-stmt counters from `CommandComplete` tags and `DataRow` frames, and returns once `'Z'` arrives (updating `lastUpstreamRFQ`). + +**Files:** +- Create: `internal/db/proxy/postgres/upstreamread.go` +- Create: `internal/db/proxy/postgres/upstreamread_test.go` + +- [ ] **Step 1: Write failing tests** + +Create `internal/db/proxy/postgres/upstreamread_test.go`: + +```go +//go:build linux + +package postgres + +import ( + "context" + "net" + "strings" + "testing" + "time" + + "github.com/jackc/pgx/v5/pgproto3" +) + +func TestParseCommandTag(t *testing.T) { + cases := []struct { + tag string + wantRows, wantAff *int64 + }{ + {"SELECT 7", i64ptr(7), nil}, + {"INSERT 0 5", nil, i64ptr(5)}, + {"UPDATE 3", nil, i64ptr(3)}, + {"DELETE 2", nil, i64ptr(2)}, + {"MOVE 0", nil, i64ptr(0)}, + {"COPY 4", nil, i64ptr(4)}, + {"CREATE TABLE", nil, nil}, + {"BEGIN", nil, nil}, + {"COMMIT", nil, nil}, + } + for _, tc := range cases { + t.Run(tc.tag, func(t *testing.T) { + gotRows, gotAff := parseCommandTag(tc.tag) + if !i64eq(gotRows, tc.wantRows) || !i64eq(gotAff, tc.wantAff) { + t.Fatalf("parseCommandTag(%q) = (%v, %v) want (%v, %v)", + tc.tag, gotRows, gotAff, tc.wantRows, tc.wantAff) + } + }) + } +} + +func TestForwardUpstreamUntilRFQ_HappyPath(t *testing.T) { + pc, clientFE, _, upstreamScript := newUpstreamReadFixture(t) + pc.state.lastUpstreamRFQ = 'I' + upstreamScript([]pgproto3.BackendMessage{ + &pgproto3.RowDescription{Fields: []pgproto3.FieldDescription{{Name: []byte("a")}}}, + &pgproto3.DataRow{Values: [][]byte{[]byte("1")}}, + &pgproto3.DataRow{Values: [][]byte{[]byte("2")}}, + &pgproto3.CommandComplete{CommandTag: []byte("SELECT 2")}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + }) + + r, err := pc.forwardUpstreamUntilRFQ(context.Background(), time.Now(), 16) + if err != nil { + t.Fatalf("forwardUpstreamUntilRFQ: %v", err) + } + if len(r.RowsByStmt) != 1 || r.RowsByStmt[0] != 2 { + t.Fatalf("RowsByStmt = %v want [2]", r.RowsByStmt) + } + if len(r.AffectedByStmt) != 1 || r.AffectedByStmt[0] != nil { + t.Fatalf("AffectedByStmt = %v want [nil]", r.AffectedByStmt) + } + if r.ErrorCode != "" { + t.Fatalf("ErrorCode = %q want empty", r.ErrorCode) + } + if pc.state.lastUpstreamRFQ != 'I' { + t.Fatalf("lastUpstreamRFQ = %q want 'I'", pc.state.lastUpstreamRFQ) + } + // Drain client side: every upstream frame should have been forwarded. + for range 5 { + _ = mustReceiveClientFrame(t, clientFE) + } +} + +func TestForwardUpstreamUntilRFQ_MultiStmt(t *testing.T) { + pc, _, _, upstreamScript := newUpstreamReadFixture(t) + pc.state.lastUpstreamRFQ = 'I' + upstreamScript([]pgproto3.BackendMessage{ + &pgproto3.CommandComplete{CommandTag: []byte("INSERT 0 3")}, + &pgproto3.CommandComplete{CommandTag: []byte("INSERT 0 5")}, + &pgproto3.ReadyForQuery{TxStatus: 'T'}, + }) + r, err := pc.forwardUpstreamUntilRFQ(context.Background(), time.Now(), 64) + if err != nil { + t.Fatalf("err: %v", err) + } + if len(r.AffectedByStmt) != 2 { + t.Fatalf("AffectedByStmt = %v want 2 entries", r.AffectedByStmt) + } + if *r.AffectedByStmt[0] != 3 || *r.AffectedByStmt[1] != 5 { + t.Fatalf("AffectedByStmt = %v want [3,5]", r.AffectedByStmt) + } + if pc.state.lastUpstreamRFQ != 'T' { + t.Fatalf("lastUpstreamRFQ = %q want 'T'", pc.state.lastUpstreamRFQ) + } +} + +func TestForwardUpstreamUntilRFQ_MidBatchError(t *testing.T) { + pc, _, _, upstreamScript := newUpstreamReadFixture(t) + pc.state.lastUpstreamRFQ = 'I' + upstreamScript([]pgproto3.BackendMessage{ + &pgproto3.CommandComplete{CommandTag: []byte("INSERT 0 3")}, + &pgproto3.ErrorResponse{Severity: "ERROR", SQLState: "23505", Message: "dup key"}, + &pgproto3.ReadyForQuery{TxStatus: 'E'}, + }) + r, err := pc.forwardUpstreamUntilRFQ(context.Background(), time.Now(), 64) + if err != nil { + t.Fatalf("err: %v", err) + } + if r.ErrorCode != "23505" { + t.Fatalf("ErrorCode = %q want 23505", r.ErrorCode) + } + if pc.state.lastUpstreamRFQ != 'E' { + t.Fatalf("lastUpstreamRFQ = %q want 'E'", pc.state.lastUpstreamRFQ) + } +} + +func i64ptr(v int64) *int64 { return &v } +func i64eq(a, b *int64) bool { + if a == nil || b == nil { + return a == b + } + return *a == *b +} + +func newUpstreamReadFixture(t *testing.T) (*proxyConn, *pgproto3.Frontend, net.Conn, func([]pgproto3.BackendMessage)) { + pc, clientFE, _ := newSimpleQueryFixture(t) + up1, up2 := net.Pipe() + t.Cleanup(func() { _ = up1.Close(); _ = up2.Close() }) + pc.state.upstream = up2 + pc.state.upstreamFE = pgproto3.NewFrontend(up2, up2) + script := func(msgs []pgproto3.BackendMessage) { + go func() { + be := pgproto3.NewBackend(up1, up1) + for _, m := range msgs { + be.Send(m) + } + _ = be.Flush() + }() + } + // Drain anything the proxy sends to clientFE in the background so writes + // in forwardUpstreamUntilRFQ don't block; tests that need to inspect them + // can reach into clientFE before the drain runs. + go func() { + _, _ = clientFE.Receive() + }() + return pc, clientFE, up1, script +} +``` + +- [ ] **Step 2: Run tests to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run "TestParseCommandTag|TestForwardUpstreamUntilRFQ" -count=1` +Expected: build errors referencing `forwardUpstreamUntilRFQ` and `parseCommandTag`. + +- [ ] **Step 3: Implement `upstreamread.go`** + +Create `internal/db/proxy/postgres/upstreamread.go`: + +```go +//go:build linux + +package postgres + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + "github.com/jackc/pgx/v5/pgproto3" +) + +// upstreamResult collects counters and final state from one Q...Z round-trip. +// Per-statement counters live in slices indexed by the order CommandComplete +// frames arrived in. Statements that did not produce a CommandComplete frame +// (mid-batch ErrorResponse aborted them) get null counters at event-build time. +type upstreamResult struct { + BytesOut int64 + RowsByStmt []*int64 + AffectedByStmt []*int64 + LatencyMs int64 + ErrorCode string +} + +// forwardUpstreamUntilRFQ reads upstream frames one at a time and forwards +// each to the client. Returns when the upstream sends ReadyForQuery, updating +// pc.state.lastUpstreamRFQ. Updates upstreamResult counters as it goes. +// +// bytesIn is the inbound 'Q' frame body length (the caller knows it; we just +// pass it through for completeness — currently unused inside this function, +// but the spine and event-builder use it for the per-stmt Result struct). +func (pc *proxyConn) forwardUpstreamUntilRFQ(ctx context.Context, sentAt time.Time, bytesIn int) (upstreamResult, error) { + _ = bytesIn // attribution belongs to the caller + var r upstreamResult + var curRows int64 + curRowsSet := false + + for { + if err := ctx.Err(); err != nil { + return r, err + } + msg, err := pc.state.upstreamFE.Receive() + if err != nil { + return r, fmt.Errorf("upstream recv: %w", err) + } + + switch m := msg.(type) { + case *pgproto3.DataRow: + curRows++ + curRowsSet = true + r.BytesOut += int64(estimatedFrameSize(m)) + pc.backend.Send(m) + + case *pgproto3.CommandComplete: + rows, aff := parseCommandTag(string(m.CommandTag)) + if curRowsSet && rows == nil { + rows = i64ptr(curRows) + } + r.RowsByStmt = append(r.RowsByStmt, rows) + r.AffectedByStmt = append(r.AffectedByStmt, aff) + curRows, curRowsSet = 0, false + r.BytesOut += int64(estimatedFrameSize(m)) + pc.backend.Send(m) + + case *pgproto3.ErrorResponse: + if r.ErrorCode == "" { + r.ErrorCode = m.SQLState + } + r.BytesOut += int64(estimatedFrameSize(m)) + pc.backend.Send(m) + + case *pgproto3.ReadyForQuery: + pc.state.lastUpstreamRFQ = m.TxStatus + r.BytesOut += int64(estimatedFrameSize(m)) + pc.backend.Send(m) + if err := pc.backend.Flush(); err != nil { + return r, fmt.Errorf("flush after RFQ: %w", err) + } + r.LatencyMs = time.Since(sentAt).Milliseconds() + return r, nil + + default: + // RowDescription / ParameterStatus / NoticeResponse / NotificationResponse / + // ParameterDescription / etc. — forward verbatim with no counter effect. + r.BytesOut += int64(estimatedFrameSize(m)) + pc.backend.Send(m) + } + } +} + +// estimatedFrameSize is an approximation good enough for BytesOut accounting. +// pgproto3 does not expose a public encoded-length helper, so we encode into +// a scratch slice. This is on the hot path; if performance becomes a concern +// a future plan can swap for a per-type length-by-fields path. +func estimatedFrameSize(m pgproto3.BackendMessage) int { + buf := m.Encode(nil) + return len(buf) +} + +// parseCommandTag parses the PostgreSQL CommandComplete tag string. Returns +// (rowsReturned, rowsAffected) — only one is non-nil for any given tag, +// except utility tags ("BEGIN", "CREATE TABLE") which return (nil, nil). +// +// Recognized prefixes: +// SELECT → (n, nil) +// INSERT → (nil, n) +// UPDATE → (nil, n) +// DELETE → (nil, n) +// MOVE → (nil, n) +// FETCH → (nil, n) +// COPY → (nil, n) +func parseCommandTag(tag string) (rows *int64, affected *int64) { + fields := strings.Fields(tag) + if len(fields) == 0 { + return nil, nil + } + parseN := func(s string) *int64 { + n, err := strconv.ParseInt(s, 10, 64) + if err != nil { + return nil + } + return &n + } + switch fields[0] { + case "SELECT": + if len(fields) >= 2 { + return parseN(fields[1]), nil + } + case "INSERT": + if len(fields) >= 3 { + return nil, parseN(fields[2]) + } + case "UPDATE", "DELETE", "MOVE", "FETCH", "COPY": + if len(fields) >= 2 { + return nil, parseN(fields[1]) + } + } + return nil, nil +} +``` + +- [ ] **Step 4: Run tests to confirm passing** + +Run: `go test ./internal/db/proxy/postgres/ -run "TestParseCommandTag|TestForwardUpstreamUntilRFQ" -count=1 -v` +Expected: all PASS. + +- [ ] **Step 5: Commit** + +```bash +git add internal/db/proxy/postgres/upstreamread.go \ + internal/db/proxy/postgres/upstreamread_test.go +git commit -m "db: proxy — upstreamread per-frame demux + counter accumulation" +``` + +--- + +## Task 10: `deny.go` — synth helpers + SQLSTATE picker + +**Why:** Centralize the wire-side synthesis used by both pre-tx-deny (ErrorResponse + RFQ('I')) and in-tx-deny (ErrorResponse only). `pickDenySynth` chooses SQLSTATE by rule kind. Replaces the `synthErrorAndRFQTmp` helper introduced in Task 8. + +**Files:** +- Create: `internal/db/proxy/postgres/deny.go` +- Create: `internal/db/proxy/postgres/deny_test.go` +- Modify: `internal/db/proxy/postgres/simplequery.go` (drop the `_Tmp` helper) + +- [ ] **Step 1: Write failing tests** + +Create `internal/db/proxy/postgres/deny_test.go`: + +```go +//go:build linux + +package postgres + +import ( + "testing" + + "github.com/jackc/pgx/v5/pgproto3" + + "github.com/agentsh/agentsh/internal/db/policy" +) + +func TestSynthErrorAndRFQ_WritesErrorThenRFQI(t *testing.T) { + pc, clientFE, _ := newSimpleQueryFixture(t) + + if err := pc.synthErrorAndRFQ("42501", "denied"); err != nil { + t.Fatalf("synth: %v", err) + } + m1 := mustReceiveClientFrame(t, clientFE) + er, ok := m1.(*pgproto3.ErrorResponse) + if !ok { + t.Fatalf("first frame = %T want ErrorResponse", m1) + } + if er.SQLState != "42501" || er.Message != "denied" || er.Severity != "ERROR" { + t.Fatalf("ErrorResponse = %+v", er) + } + m2 := mustReceiveClientFrame(t, clientFE) + rfq, ok := m2.(*pgproto3.ReadyForQuery) + if !ok { + t.Fatalf("second frame = %T want ReadyForQuery", m2) + } + if rfq.TxStatus != 'I' { + t.Fatalf("RFQ TxStatus = %q want 'I'", rfq.TxStatus) + } +} + +func TestSynthErrorOnly_NoTrailingRFQ(t *testing.T) { + pc, clientFE, _ := newSimpleQueryFixture(t) + + if err := pc.synthErrorOnly("42501", "in-tx"); err != nil { + t.Fatalf("synth: %v", err) + } + er := mustReceiveClientFrame(t, clientFE).(*pgproto3.ErrorResponse) + if er.Message != "in-tx" { + t.Fatalf("Message = %q", er.Message) + } + // Subsequent Receive must time out or return EOF; we close the client side + // and assert. + if err := clientFE.SetDeadline(timeNow().Add(50)); err != nil { _ = err } +} + +func TestPickDenySynth_FirstDenyWins(t *testing.T) { + decisions := []policy.Decision{ + {Verb: policy.VerbAllow}, + {Verb: policy.VerbDeny, RuleKind: policy.RuleKindStatement, RuleName: "no-deletes", Reason: "delete denied"}, + {Verb: policy.VerbDeny, RuleKind: policy.RuleKindStatement, RuleName: "no-truncates"}, + } + rendered, sqlstate := pickDenySynth(decisions) + if sqlstate != "42501" { + t.Fatalf("sqlstate = %q want 42501", sqlstate) + } + if rendered == "" { + t.Fatalf("rendered empty") + } + if !contains(rendered, "no-deletes") { + t.Fatalf("rendered = %q does not reference first deny rule", rendered) + } +} + +func TestPickDenySynth_ConnectionRuleUses28000(t *testing.T) { + decisions := []policy.Decision{ + {Verb: policy.VerbDeny, RuleKind: policy.RuleKindConnection, RuleName: "no-replica"}, + } + _, sqlstate := pickDenySynth(decisions) + if sqlstate != "28000" { + t.Fatalf("sqlstate = %q want 28000", sqlstate) + } +} + +func TestPickDenySynth_ImplicitDenyMessage(t *testing.T) { + decisions := []policy.Decision{ + {Verb: policy.VerbDeny, RuleKind: policy.RuleKindStatement, RuleName: "", Reason: "no rule covers unsafe_io"}, + } + rendered, _ := pickDenySynth(decisions) + if !contains(rendered, "no rule covers") { + t.Fatalf("rendered = %q does not include reason text", rendered) + } +} + +func contains(s, sub string) bool { return strings.Contains(s, sub) } +``` + +Note: `pickDenySynth` may be passed a `denyMessage` separately in the implementation; the test expects implicit-deny + non-empty Reason to surface in `rendered`. Adjust the implementation accordingly. If `RuleKind` constants in `internal/db/policy/types.go` are named differently (e.g. `RuleKindStatement`), keep the symbol names consistent with what's already there. + +- [ ] **Step 2: Run tests to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run "TestSynthErrorAndRFQ|TestSynthErrorOnly|TestPickDenySynth" -count=1` +Expected: build errors. + +- [ ] **Step 3: Implement `deny.go`** + +Create `internal/db/proxy/postgres/deny.go`: + +```go +//go:build linux + +package postgres + +import ( + "fmt" + + "github.com/jackc/pgx/v5/pgproto3" + + "github.com/agentsh/agentsh/internal/db/policy" +) + +const ( + sqlstateInsufficientPrivilege = "42501" // statement-rule deny + sqlstateAuthFailure = "28000" // connection-rule deny + sqlstateProgramLimitExceeded = "54000" // frame budget + sqlstateFeatureNotSupported = "0A000" // extended query / function call +) + +// synthErrorAndRFQ writes ErrorResponse + ReadyForQuery('I') to the client. +// Used when lastUpstreamRFQ in {0, 'I'} so the next 'Q' can proceed. +func (pc *proxyConn) synthErrorAndRFQ(sqlstate, message string) error { + pc.backend.Send(&pgproto3.ErrorResponse{Severity: "ERROR", SQLState: sqlstate, Message: message}) + pc.backend.Send(&pgproto3.ReadyForQuery{TxStatus: 'I'}) + return pc.backend.Flush() +} + +// synthErrorOnly writes ErrorResponse with no trailing RFQ. Used for the +// in-tx deny case ({'T', 'E'}) — caller closes both conns immediately after. +func (pc *proxyConn) synthErrorOnly(sqlstate, message string) error { + pc.backend.Send(&pgproto3.ErrorResponse{Severity: "ERROR", SQLState: sqlstate, Message: message}) + return pc.backend.Flush() +} + +// pickDenySynth chooses the rendered deny message and SQLSTATE for a batch. +// Iterates in order; first denying entry wins (most-restrictive is +// deterministic under §10.2 with stable rule order). +// +// SQLSTATE selection: +// connection-rule deny → 28000 +// statement-rule deny → 42501 +// +// Rendered message: rule's DenyMessage template if Plan 02 carried one +// (not yet exposed on Decision; we fall back to RuleName / Reason). +func pickDenySynth(decisions []policy.Decision) (string, string) { + for _, d := range decisions { + if d.Verb != policy.VerbDeny { + continue + } + sqlstate := sqlstateInsufficientPrivilege + if d.RuleKind == policy.RuleKindConnection { + sqlstate = sqlstateAuthFailure + } + rendered := renderDenyMessage(d) + return rendered, sqlstate + } + // Defensive: caller is supposed to ensure anyDeny. + return "denied by AgentSH policy", sqlstateInsufficientPrivilege +} + +func renderDenyMessage(d policy.Decision) string { + if d.RuleName != "" { + return fmt.Sprintf("denied by AgentSH policy: %s", d.RuleName) + } + if d.Reason != "" { + return fmt.Sprintf("denied by AgentSH policy: %s", d.Reason) + } + return "denied by AgentSH policy" +} +``` + +- [ ] **Step 4: Replace the `_Tmp` helper in `simplequery.go`** + +In `internal/db/proxy/postgres/simplequery.go`, delete `synthErrorAndRFQTmp` (its definition and any callers), and update the call site in `handleQuery` to use `pc.synthErrorAndRFQ`. + +- [ ] **Step 5: Run all tests to confirm passing** + +Run: `go test ./internal/db/proxy/postgres/ -count=1` +Expected: all green. + +- [ ] **Step 6: Commit** + +```bash +git add internal/db/proxy/postgres/deny.go \ + internal/db/proxy/postgres/deny_test.go \ + internal/db/proxy/postgres/simplequery.go +git commit -m "db: proxy — deny synth helpers + SQLSTATE picker" +``` + +--- + +## Task 11: `eventbuilder.go` — pure event builder, redaction tiers, digest, `denied_by_sibling` + +**Why:** Pure function that turns `{stmt, decision, sql, result, denyAction, tier, conn}` into a fully-populated `events.DBEvent`. Owns the digest, the redaction-tier rendering, the `denied_by_sibling` tagging for non-denying statements in an `anyDeny` batch, and the `command_id` shape. + +**Files:** +- Create: `internal/db/proxy/postgres/eventbuilder.go` +- Create: `internal/db/proxy/postgres/eventbuilder_test.go` + +- [ ] **Step 1: Write failing tests** + +Create `internal/db/proxy/postgres/eventbuilder_test.go`: + +```go +//go:build linux + +package postgres + +import ( + "crypto/sha256" + "encoding/hex" + "strings" + "testing" + "time" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" + "github.com/agentsh/agentsh/internal/db/effects" + "github.com/agentsh/agentsh/internal/db/events" + "github.com/agentsh/agentsh/internal/db/policy" +) + +func TestBuildStatementEvent_FullTier_VerbatimSlice(t *testing.T) { + sql := "SELECT 1; SELECT 2" + stmts := []effects.ClassifiedStatement{ + {Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, SourceStart: 0, SourceEnd: 8, RawVerb: "SELECT"}, + {Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, SourceStart: 10, SourceEnd: 18, RawVerb: "SELECT"}, + } + parser := classify_pg.New(classify_pg.DialectPostgres) + ev := buildStatementEvent(buildArgs{ + Stmt: stmts[0], + StmtIndex: 0, + BatchTotal: 2, + Decision: policy.Decision{Verb: policy.VerbAllow, RuleKind: policy.RuleKindStatement, RuleName: "app-allow-read"}, + SQL: sql, + Tier: policy.RedactFull, + Conn: connStateForTest("appdb", "postgres", "terminate_reissue"), + DenyAction: "none", + BatchSHA: sha256Hex(sql), + Parser: parser, + }) + if ev.StatementText != "SELECT 1" { + t.Fatalf("StatementText = %q want %q", ev.StatementText, "SELECT 1") + } + if !strings.HasPrefix(ev.StatementDigest, "sha256:") { + t.Fatalf("StatementDigest = %q must start sha256:", ev.StatementDigest) + } + if ev.Decision.Verb != "allow" { + t.Fatalf("Decision.Verb = %q want allow", ev.Decision.Verb) + } + if ev.TLS.Mode != "terminate_reissue" { + t.Fatalf("TLS.Mode = %q", ev.TLS.Mode) + } + if ev.CommandID == "" || !strings.Contains(ev.CommandID, ":0") { + t.Fatalf("CommandID = %q want suffix :0", ev.CommandID) + } +} + +func TestBuildStatementEvent_DigestStableAcrossTiers(t *testing.T) { + sql := "SELECT 'hello'" + stmt := effects.ClassifiedStatement{ + Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, + SourceStart: 0, SourceEnd: int32(len(sql)), + } + parser := classify_pg.New(classify_pg.DialectPostgres) + digests := map[policy.RedactionTier]string{} + for _, tier := range []policy.RedactionTier{policy.RedactFull, policy.RedactParametersRedacted, policy.RedactNone} { + ev := buildStatementEvent(buildArgs{ + Stmt: stmt, SQL: sql, Tier: tier, + Conn: connStateForTest("appdb", "postgres", "terminate_reissue"), + Decision: policy.Decision{Verb: policy.VerbAllow, RuleKind: policy.RuleKindStatement}, + DenyAction: "none", + BatchSHA: sha256Hex(sql), + Parser: parser, + }) + digests[tier] = ev.StatementDigest + } + if digests[policy.RedactFull] != digests[policy.RedactParametersRedacted] || + digests[policy.RedactParametersRedacted] != digests[policy.RedactNone] { + t.Fatalf("digests diverged across tiers: %+v", digests) + } +} + +func TestBuildStatementEvent_DeniedBySibling(t *testing.T) { + sql := "SELECT 1; DELETE FROM t" + parser := classify_pg.New(classify_pg.DialectPostgres) + stmt0 := effects.ClassifiedStatement{ + Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, + SourceStart: 0, SourceEnd: 8, RawVerb: "SELECT", + } + ev := buildStatementEvent(buildArgs{ + Stmt: stmt0, StmtIndex: 0, BatchTotal: 2, + Decision: policy.Decision{Verb: policy.VerbDeny, RuleKind: policy.RuleKindStatement, Reason: "denied by sibling statement"}, + SQL: sql, Tier: policy.RedactParametersRedacted, + Conn: connStateForTest("appdb", "postgres", "terminate_reissue"), + DenyAction: "none", + IsDeniedBySibling: true, + BatchSHA: sha256Hex(sql), + Parser: parser, + }) + if ev.Decision.Verb != "deny" { + t.Fatalf("Decision.Verb = %q want deny", ev.Decision.Verb) + } + if ev.Result.ErrorCode != "DENIED_BY_SIBLING" { + t.Fatalf("Result.ErrorCode = %q want DENIED_BY_SIBLING", ev.Result.ErrorCode) + } + if ev.Result.RowsReturned != nil || ev.Result.RowsAffected != nil { + t.Fatalf("Result rows must be nil: %+v", ev.Result) + } +} + +func TestBuildStatementEvent_NoneTierStripsText(t *testing.T) { + sql := "SELECT 1" + stmt := effects.ClassifiedStatement{ + Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, + SourceStart: 0, SourceEnd: int32(len(sql)), + } + parser := classify_pg.New(classify_pg.DialectPostgres) + ev := buildStatementEvent(buildArgs{ + Stmt: stmt, SQL: sql, Tier: policy.RedactNone, + Conn: connStateForTest("appdb", "postgres", "terminate_reissue"), + Decision: policy.Decision{Verb: policy.VerbAllow, RuleKind: policy.RuleKindStatement}, + DenyAction: "none", + BatchSHA: sha256Hex(sql), + Parser: parser, + }) + if ev.StatementText != "" { + t.Fatalf("StatementText must be empty under RedactNone: %q", ev.StatementText) + } + if ev.StatementDigest == "" { + t.Fatalf("StatementDigest must be populated under RedactNone") + } +} + +func sha256Hex(s string) string { + sum := sha256.Sum256([]byte(s)) + return hex.EncodeToString(sum[:]) +} + +func connStateForTest(svc, dialect, tlsMode string) connState { + return connState{ + dbService: svc, + clientIdentity: "uid:1000", + dbUser: "agent", + database: "app", + appName: "tests", + tlsMode: tlsMode, + } +} + +func init() { + _ = events.DBEvent{} // ensure package compiles when only test imports referenced + _ = time.Time{} // pgproto3 import not needed here +} +``` + +- [ ] **Step 2: Run tests to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run TestBuildStatementEvent -count=1` +Expected: build errors — `buildStatementEvent`, `buildArgs` undefined. + +- [ ] **Step 3: Implement `eventbuilder.go`** + +Create `internal/db/proxy/postgres/eventbuilder.go`: + +```go +//go:build linux + +package postgres + +import ( + "crypto/sha256" + "encoding/hex" + "fmt" + "strings" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" + "github.com/agentsh/agentsh/internal/db/effects" + "github.com/agentsh/agentsh/internal/db/events" + "github.com/agentsh/agentsh/internal/db/policy" +) + +// buildArgs collects the inputs to buildStatementEvent. Keeping them in a +// struct avoids a 14-argument function and makes test cases readable. +type buildArgs struct { + Stmt effects.ClassifiedStatement + StmtIndex int + BatchTotal int + Decision policy.Decision + SQL string + Tier policy.RedactionTier + Conn connState + BytesIn int64 + BytesOut int64 + LatencyMs int64 + RowsReturned *int64 + RowsAffected *int64 + UpstreamErrCode string + DenyAction string + IsDeniedBySibling bool + BatchSHA string // sha256 hex of the full Q.String; used for command_id + Parser classify_pg.Parser +} + +// buildStatementEvent returns a fully-populated events.DBEvent. Pure function +// — no I/O, no clock, no globals beyond the supplied buildArgs. +func buildStatementEvent(a buildArgs) events.DBEvent { + slice := perStmtSlice(a.SQL, a.Stmt) + + normalized, err := a.Parser.Normalize(slice) + if err != nil || normalized == "" { + normalized = strings.TrimSpace(slice) + } + digestBytes := sha256.Sum256([]byte(normalized)) + digest := "sha256:" + hex.EncodeToString(digestBytes[:]) + + var stmtText string + var redaction events.Redaction + switch a.Tier { + case policy.RedactFull: + stmtText = slice + redaction = events.RedactionFull + case policy.RedactParametersRedacted: + stmtText = normalized + redaction = events.RedactionParametersRedacted + case policy.RedactNone: + stmtText = "" + redaction = events.RedactionNone + default: + stmtText = normalized + redaction = events.RedactionParametersRedacted + } + + dec := buildDecision(a.Decision, a.IsDeniedBySibling) + + result := events.EventResult{ + RowsReturned: a.RowsReturned, + RowsAffected: a.RowsAffected, + BytesIn: a.BytesIn, + BytesOut: a.BytesOut, + LatencyMs: a.LatencyMs, + ErrorCode: a.UpstreamErrCode, + } + if a.IsDeniedBySibling { + result = events.EventResult{ + BytesIn: a.BytesIn, + ErrorCode: "DENIED_BY_SIBLING", + } + } + + tx := events.EventTxContext{ + InTransaction: a.Conn.lastUpstreamRFQ == 'T' || a.Conn.lastUpstreamRFQ == 'E', + DenyAction: a.DenyAction, + } + + predicates := events.EventPredicates{HasFilter: hasFilter(a.Stmt)} + + return events.DBEvent{ + EventID: newEventID(), + SessionID: a.Conn.clientIdentity, + CommandID: fmt.Sprintf("%s:%d", a.BatchSHA, a.StmtIndex), + Timestamp: timeNow(), + DBService: a.Conn.dbService, + DBFamily: "postgres", + DBDialect: stmtDialect(a.Conn), + DBUser: a.Conn.dbUser, + ApplicationName: a.Conn.appName, + ClientIdentity: a.Conn.clientIdentity, + Effects: a.Stmt.Effects, + RawVerb: a.Stmt.RawVerb, + ParserBackend: a.Stmt.ParserBackend, + StatementText: stmtText, + StatementDigest: digest, + StatementRedaction: redaction, + TLS: events.EventTLS{Mode: a.Conn.tlsMode, ClientSNI: a.Conn.sniHostname}, + Decision: dec, + Result: result, + TxContext: tx, + Predicates: predicates, + } +} + +func buildDecision(d policy.Decision, deniedBySibling bool) events.EventDecision { + if deniedBySibling { + return events.EventDecision{ + Verb: "deny", + RuleKind: "statement", + Reason: "denied by sibling statement", + } + } + verb := strings.ToLower(d.Verb.String()) + // Plan 02's Approval struct → caller already rewrote Verb to deny before + // reaching us in 04c (APPROVE_NOT_YET_SUPPORTED stub). Be defensive. + if verb == "" { + verb = "deny" + } + out := events.EventDecision{ + Verb: verb, + RuleKind: strings.ToLower(d.RuleKind.String()), + RuleName: d.RuleName, + MatchingEffectIndex: d.MatchingEffectIndex, + Reason: d.Reason, + } + if d.MatchingEffectGroup != effects.GroupUnknown { + out.MatchingEffectGroup = d.MatchingEffectGroup.String() + } + if len(d.ContributingAuditRules) > 0 { + out.ContributingAuditRules = append([]string(nil), d.ContributingAuditRules...) + } + return out +} + +func perStmtSlice(sql string, stmt effects.ClassifiedStatement) string { + if stmt.SourceStart == 0 && stmt.SourceEnd == 0 { + return strings.TrimSpace(sql) + } + if int(stmt.SourceEnd) > len(sql) || stmt.SourceStart < 0 || stmt.SourceStart > stmt.SourceEnd { + return strings.TrimSpace(sql) + } + return sql[stmt.SourceStart:stmt.SourceEnd] +} + +// hasFilter returns true when the classifier indicated a WHERE clause was +// present. Plan 04c reads this directly from a classifier-supplied flag once +// effects.Effect carries it; until then, we conservatively return false. +func hasFilter(stmt effects.ClassifiedStatement) bool { + for _, e := range stmt.Effects { + if e.HasFilter { + return true + } + } + return false +} + +// stmtDialect returns the service's dialect string for the event. This is +// best-effort: 04c does not surface dialect into connState beyond TLSMode, +// so we look it up on the Server. To keep the builder pure for tests, we +// thread it via Conn at a future task if needed; for now default to +// "postgres" when unset. +func stmtDialect(c connState) string { + if c.dbService == "" { + return "postgres" + } + return "postgres" +} +``` + +Two things to confirm against existing code before proceeding: + +1. **`effects.Effect.HasFilter`** may not exist yet — check `internal/db/effects/effect.go`. If absent, replace `hasFilter` with a stub `return false` and add `// TODO Plan 05: thread WHERE-clause flag from classifier`. **Update this plan to drop the TODO when the field arrives.** +2. **`connState.dbService` / `dbUser` / `database` / `appName` / `sniHostname`** must exist on the struct (they do — see proxyconn.go from 04a/b/b₂). + +- [ ] **Step 4: Run tests to confirm passing** + +Run: `go test ./internal/db/proxy/postgres/ -run TestBuildStatementEvent -count=1 -v` +Expected: PASS. + +- [ ] **Step 5: Commit** + +```bash +git add internal/db/proxy/postgres/eventbuilder.go \ + internal/db/proxy/postgres/eventbuilder_test.go +git commit -m "db: proxy — eventbuilder with redaction tiers + digest + sibling tagging" +``` + +--- + +## Task 12: `handleQuery` allow path — classify + evaluate + forward + +**Why:** Wire `simplequery.go::handleQuery` from stub to working allow path: classify via `classifierFor(dialect)`, evaluate every statement, decide `anyDeny`, and on no-deny forward the `Q` upstream and run `forwardUpstreamUntilRFQ` to demux the response. Emit one per-stmt allow/audit event. + +**Files:** +- Modify: `internal/db/proxy/postgres/simplequery.go` +- Modify: `internal/db/proxy/postgres/simplequery_test.go` + +- [ ] **Step 1: Write failing tests** + +Append to `internal/db/proxy/postgres/simplequery_test.go`: + +```go +func TestHandleQuery_AllowPath_ForwardsAndEmits(t *testing.T) { + pc, clientFE, sink, upstreamScript := newAllowPathFixture(t) + pc.state.lastUpstreamRFQ = 'I' + pc.srv.SetPolicy(allowAllRuleSet(t)) + + upstreamScript([]pgproto3.BackendMessage{ + &pgproto3.RowDescription{Fields: []pgproto3.FieldDescription{{Name: []byte("a")}}}, + &pgproto3.DataRow{Values: [][]byte{[]byte("1")}}, + &pgproto3.CommandComplete{CommandTag: []byte("SELECT 1")}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + }) + + mustSendFromClient(t, clientFE, &pgproto3.Query{String: "SELECT 1"}) + + go func() { + _ = pc.simpleQueryLoop(context.Background()) + }() + + // Drain client side; expect RowDescription, DataRow, CommandComplete, ReadyForQuery + frames := drainNFrames(t, clientFE, 4) + if _, ok := frames[3].(*pgproto3.ReadyForQuery); !ok { + t.Fatalf("last frame = %T want ReadyForQuery", frames[3]) + } + + evs := sink.DrainStatements() + if len(evs) != 1 { + t.Fatalf("statement events = %d want 1", len(evs)) + } + if evs[0].Decision.Verb != "allow" { + t.Fatalf("event Verb = %q want allow", evs[0].Decision.Verb) + } + if evs[0].Result.RowsReturned == nil || *evs[0].Result.RowsReturned != 1 { + t.Fatalf("RowsReturned = %v want 1", evs[0].Result.RowsReturned) + } +} + +func TestHandleQuery_AllowPath_MultiStmt(t *testing.T) { + pc, clientFE, sink, upstreamScript := newAllowPathFixture(t) + pc.state.lastUpstreamRFQ = 'I' + pc.srv.SetPolicy(allowAllRuleSet(t)) + + upstreamScript([]pgproto3.BackendMessage{ + &pgproto3.CommandComplete{CommandTag: []byte("INSERT 0 3")}, + &pgproto3.CommandComplete{CommandTag: []byte("INSERT 0 5")}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + }) + + mustSendFromClient(t, clientFE, &pgproto3.Query{String: "INSERT INTO t VALUES (1); INSERT INTO t VALUES (2)"}) + + go func() { _ = pc.simpleQueryLoop(context.Background()) }() + _ = drainNFrames(t, clientFE, 3) + + evs := sink.DrainStatements() + if len(evs) != 2 { + t.Fatalf("statement events = %d want 2", len(evs)) + } + if *evs[0].Result.RowsAffected != 3 || *evs[1].Result.RowsAffected != 5 { + t.Fatalf("affected mismatch: %v / %v", evs[0].Result.RowsAffected, evs[1].Result.RowsAffected) + } + if evs[0].CommandID == evs[1].CommandID { + t.Fatalf("CommandID must differ per stmt: %q / %q", evs[0].CommandID, evs[1].CommandID) + } +} +``` + +Helpers: + +```go +func newAllowPathFixture(t *testing.T) (*proxyConn, *pgproto3.Frontend, *events.SyncSink, func([]pgproto3.BackendMessage)) { + pc, clientFE, sink := newSimpleQueryFixture(t) + up1, up2 := net.Pipe() + t.Cleanup(func() { _ = up1.Close(); _ = up2.Close() }) + pc.state.upstream = up2 + pc.state.upstreamFE = pgproto3.NewFrontend(up2, up2) + script := func(msgs []pgproto3.BackendMessage) { + go func() { + be := pgproto3.NewBackend(up1, up1) + // Wait for one client message (the 'Q') before sending the script. + _, _ = be.Receive() + for _, m := range msgs { + be.Send(m) + } + _ = be.Flush() + }() + } + return pc, clientFE, sink, script +} + +func allowAllRuleSet(t *testing.T) *policy.RuleSet { + // Use the policy package's Decode against a permissive YAML. + rs, _, err := policy.Decode([]byte(` +services: + - name: test + family: postgres + dialect: postgres + upstream: "127.0.0.1:5432" + tls_mode: terminate_reissue + +rules: + - name: allow-all + decision: allow + operations: [read, write, ddl, dml, session, procedural] + services: [test] + objects: ['*'] +`)) + if err != nil { + t.Fatalf("Decode: %v", err) + } + return rs +} + +func drainNFrames(t *testing.T, fe *pgproto3.Frontend, n int) []pgproto3.BackendMessage { + t.Helper() + out := make([]pgproto3.BackendMessage, 0, n) + for i := 0; i < n; i++ { + m, err := fe.Receive() + if err != nil { + t.Fatalf("Receive[%d]: %v", i, err) + } + out = append(out, m) + } + return out +} +``` + +Verify against the current shape of `policy.Decode` — adjust the YAML keys to match what the codebase actually accepts; the rule schema may be `operations: [read]` and `objects: ['*']` is a wildcard the evaluator supports. Run `go test ./internal/db/policy/...` to confirm the YAML parses, then revise the test fixture if needed. + +- [ ] **Step 2: Run tests to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run TestHandleQuery_AllowPath -count=1` +Expected: FAIL — `handleQuery` is still the stub from Task 8. + +- [ ] **Step 3: Wire the allow path** + +Replace `handleQuery` in `internal/db/proxy/postgres/simplequery.go`: + +```go +func (pc *proxyConn) handleQuery(ctx context.Context, q *pgproto3.Query) error { + if len(q.String) > pc.srv.cfg.MaxQueryBytes { + pc.emitFrameTooLarge(ctx, len(q.String)) + _ = pc.synthErrorAndRFQ(sqlstateProgramLimitExceeded, + fmt.Sprintf("statement too large for AgentSH proxy: %d bytes > %d cap", + len(q.String), pc.srv.cfg.MaxQueryBytes)) + return errFrameTooLargeClose + } + + parser := pc.srv.classifierFor(pc.svc.Dialect) + stmts, _ := parser.Classify(q.String, classify_pg.SessionState{}, classify_pg.Options{}) + rs := pc.srv.policy() + decisions := make([]policy.Decision, len(stmts)) + anyDeny := false + for i, s := range stmts { + decisions[i] = policy.Evaluate(s, rs, policy.ServiceID(pc.svc.Name)) + if decisions[i].Verb == policy.VerbApprove { + decisions[i] = synthApproveAsDeny(decisions[i]) + } + if decisions[i].Verb == policy.VerbDeny { + anyDeny = true + } + } + + batchSHA := sha256HexBatch(q.String) + + if !anyDeny { + sentAt := timeNow() + pc.state.upstreamFE.Send(q) + if err := pc.state.upstreamFE.Flush(); err != nil { + return err + } + result, ferr := pc.forwardUpstreamUntilRFQ(ctx, sentAt, len(q.String)) + pc.emitAllowEvents(ctx, stmts, decisions, q.String, batchSHA, result) + return ferr + } + + // Deny path is filled in by Task 13. + return pc.synthesizeError(sqlstateInsufficientPrivilege, "deny path not yet implemented") +} + +// synthApproveAsDeny rewrites a Decision with Verb=approve into Verb=deny +// with the APPROVE_NOT_YET_SUPPORTED stub marker. Per spec §14.5, approve +// runtime lands in Plan 05; until then we surface a loud failure mode. +func synthApproveAsDeny(d policy.Decision) policy.Decision { + d.Verb = policy.VerbDeny + if d.Reason == "" { + d.Reason = "APPROVE_NOT_YET_SUPPORTED" + } + return d +} + +func sha256HexBatch(sql string) string { + sum := sha256.Sum256([]byte(sql)) + return hex.EncodeToString(sum[:]) +} + +// emitAllowEvents emits one db_statement event per ClassifiedStatement when +// none denied. Per-stmt counters come from result.RowsByStmt / +// AffectedByStmt; bytes_in / bytes_out / latency_ms are attributed per-stmt +// (each event carries the batch values). +func (pc *proxyConn) emitAllowEvents( + ctx context.Context, + stmts []effects.ClassifiedStatement, + decisions []policy.Decision, + sql string, + batchSHA string, + r upstreamResult, +) { + parser := pc.srv.classifierFor(pc.svc.Dialect) + for i, s := range stmts { + var rows, aff *int64 + if i < len(r.RowsByStmt) { + rows = r.RowsByStmt[i] + aff = r.AffectedByStmt[i] + } + errCode := "" + if r.ErrorCode != "" && i >= len(r.RowsByStmt) { + errCode = "STATEMENT_ABORTED_BY_PRIOR_ERROR" + } else if i == 0 { + errCode = r.ErrorCode + } + ev := buildStatementEvent(buildArgs{ + Stmt: s, StmtIndex: i, BatchTotal: len(stmts), + Decision: decisions[i], + SQL: sql, Tier: pc.state.redactionTier, + Conn: *pc.state, + BytesIn: int64(len(sql)), + BytesOut: r.BytesOut, + LatencyMs: r.LatencyMs, + RowsReturned: rows, + RowsAffected: aff, + UpstreamErrCode: errCode, + DenyAction: "none", + BatchSHA: batchSHA, + Parser: parser, + }) + if err := pc.srv.cfg.Sink.EmitStatement(ctx, ev); err != nil { + pc.logger.Warn("emit statement event failed", "err", err) + } + } +} +``` + +Add the required imports: `"crypto/sha256"`, `"encoding/hex"`, `"github.com/agentsh/agentsh/internal/db/effects"`, `"github.com/agentsh/agentsh/internal/db/policy"`, `classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres"`. + +- [ ] **Step 4: Add `SyncSink.DrainStatements()` if missing** + +If the existing `SyncSink` already implements `Drain()` returning `[]DBEvent`, rename callers in tests to `DrainStatements()`, or add a thin alias method `DrainStatements() []DBEvent { return s.Drain() }`. The intent: distinguish statement events from lifecycle events. + +- [ ] **Step 5: Run tests to confirm passing** + +Run: `go test ./internal/db/proxy/postgres/ -run TestHandleQuery_AllowPath -count=1 -v` +Expected: both subtests PASS. + +- [ ] **Step 6: Commit** + +```bash +git add internal/db/proxy/postgres/simplequery.go \ + internal/db/proxy/postgres/simplequery_test.go \ + internal/db/events/sink.go +git commit -m "db: proxy — handleQuery allow path with classify+evaluate+forward" +``` + +--- + +## Task 13: `handleQuery` deny path — anyDeny + per-stmt events + RFQ-gated synth + +**Why:** Complete `handleQuery` for the deny case. When `anyDeny` is true: forward nothing upstream; emit one event per statement (denying ones get the real decision, others get `denied_by_sibling` tagging); synthesize the deny based on `lastUpstreamRFQ` (local `ErrorResponse + RFQ('I')` out-of-tx; `ErrorResponse` only + terminate in-tx with `tx_context.deny_action = "connection_terminated"`). + +**Files:** +- Modify: `internal/db/proxy/postgres/simplequery.go` +- Modify: `internal/db/proxy/postgres/simplequery_test.go` + +- [ ] **Step 1: Write failing tests** + +Append to `internal/db/proxy/postgres/simplequery_test.go`: + +```go +func TestHandleQuery_DenyPath_PreTx(t *testing.T) { + pc, clientFE, sink, _ := newAllowPathFixture(t) + pc.state.lastUpstreamRFQ = 'I' + pc.srv.SetPolicy(denyDeletesRuleSet(t)) + + mustSendFromClient(t, clientFE, &pgproto3.Query{String: "DELETE FROM t"}) + + go func() { _ = pc.simpleQueryLoop(context.Background()) }() + + er := mustReceiveClientFrame(t, clientFE).(*pgproto3.ErrorResponse) + if er.SQLState != "42501" { + t.Fatalf("SQLState = %q want 42501", er.SQLState) + } + rfq := mustReceiveClientFrame(t, clientFE).(*pgproto3.ReadyForQuery) + if rfq.TxStatus != 'I' { + t.Fatalf("RFQ TxStatus = %q want 'I'", rfq.TxStatus) + } + + evs := sink.DrainStatements() + if len(evs) != 1 || evs[0].Decision.Verb != "deny" { + t.Fatalf("statement events = %+v", evs) + } + if evs[0].TxContext.DenyAction != "none" { + t.Fatalf("DenyAction = %q want none", evs[0].TxContext.DenyAction) + } +} + +func TestHandleQuery_DenyPath_InTx_Terminates(t *testing.T) { + pc, clientFE, sink, _ := newAllowPathFixture(t) + pc.state.lastUpstreamRFQ = 'T' // simulate prior BEGIN + pc.srv.SetPolicy(denyDeletesRuleSet(t)) + + mustSendFromClient(t, clientFE, &pgproto3.Query{String: "DELETE FROM t"}) + + err := pc.simpleQueryLoop(context.Background()) + if err == nil { + t.Fatalf("simpleQueryLoop must return non-nil on in-tx deny terminate") + } + + er := mustReceiveClientFrame(t, clientFE).(*pgproto3.ErrorResponse) + if er.SQLState != "42501" { + t.Fatalf("SQLState = %q want 42501", er.SQLState) + } + // No ReadyForQuery should follow — try Receive and expect an error. + if _, e := clientFE.Receive(); e == nil { + t.Fatalf("expected client conn closed after in-tx deny, got next frame") + } + + evs := sink.DrainStatements() + if len(evs) != 1 || evs[0].TxContext.DenyAction != "connection_terminated" { + t.Fatalf("events = %+v", evs) + } +} + +func TestHandleQuery_DenyPath_MultiStmt_TagsSiblings(t *testing.T) { + pc, clientFE, sink, _ := newAllowPathFixture(t) + pc.state.lastUpstreamRFQ = 'I' + pc.srv.SetPolicy(denyDeletesRuleSet(t)) + + mustSendFromClient(t, clientFE, &pgproto3.Query{String: "SELECT 1; DELETE FROM t"}) + + go func() { _ = pc.simpleQueryLoop(context.Background()) }() + _ = mustReceiveClientFrame(t, clientFE) // ErrorResponse + _ = mustReceiveClientFrame(t, clientFE) // ReadyForQuery + + evs := sink.DrainStatements() + if len(evs) != 2 { + t.Fatalf("statement events = %d want 2", len(evs)) + } + // First (SELECT) should be denied_by_sibling. + if evs[0].Result.ErrorCode != "DENIED_BY_SIBLING" || evs[0].Decision.Verb != "deny" { + t.Fatalf("evs[0] = %+v", evs[0]) + } + // Second (DELETE) is the actual denying stmt. + if evs[1].Decision.Verb != "deny" || evs[1].Decision.RuleName == "" { + t.Fatalf("evs[1] = %+v", evs[1]) + } +} + +func denyDeletesRuleSet(t *testing.T) *policy.RuleSet { + rs, _, err := policy.Decode([]byte(` +services: + - name: test + family: postgres + dialect: postgres + upstream: "127.0.0.1:5432" + tls_mode: terminate_reissue + +rules: + - name: allow-reads + decision: allow + operations: [read] + services: [test] + objects: ['*'] + - name: deny-deletes + decision: deny + operations: [write] + services: [test] + objects: ['*'] +`)) + if err != nil { + t.Fatalf("Decode: %v", err) + } + return rs +} +``` + +- [ ] **Step 2: Run tests to verify failure** + +Run: `go test ./internal/db/proxy/postgres/ -run TestHandleQuery_DenyPath -count=1` +Expected: FAIL — deny stub returns `42501` with "deny path not yet implemented", and no events emit. + +- [ ] **Step 3: Wire the deny path** + +Replace the deny stub at the end of `handleQuery` in `internal/db/proxy/postgres/simplequery.go`: + +```go + // Deny path. + denyAction := "none" + if pc.state.lastUpstreamRFQ == 'T' || pc.state.lastUpstreamRFQ == 'E' { + denyAction = "connection_terminated" + } + pc.emitDenyEvents(ctx, stmts, decisions, q.String, batchSHA, denyAction) + rendered, sqlstate := pickDenySynth(decisions) + switch pc.state.lastUpstreamRFQ { + case 0, 'I': + return pc.synthErrorAndRFQ(sqlstate, rendered) + case 'T', 'E': + _ = pc.synthErrorOnly(sqlstate, rendered) + return errInTxTerminate + default: + return fmt.Errorf("postgres.handleQuery: unexpected RFQ byte %q", pc.state.lastUpstreamRFQ) + } +} + +func (pc *proxyConn) emitDenyEvents( + ctx context.Context, + stmts []effects.ClassifiedStatement, + decisions []policy.Decision, + sql, batchSHA, denyAction string, +) { + parser := pc.srv.classifierFor(pc.svc.Dialect) + for i, s := range stmts { + deniedBySibling := decisions[i].Verb != policy.VerbDeny + ev := buildStatementEvent(buildArgs{ + Stmt: s, StmtIndex: i, BatchTotal: len(stmts), + Decision: decisions[i], + SQL: sql, Tier: pc.state.redactionTier, + Conn: *pc.state, + BytesIn: int64(len(sql)), + DenyAction: denyAction, + IsDeniedBySibling: deniedBySibling, + BatchSHA: batchSHA, + Parser: parser, + }) + if err := pc.srv.cfg.Sink.EmitStatement(ctx, ev); err != nil { + pc.logger.Warn("emit statement event failed", "err", err) + } + } +} +``` + +- [ ] **Step 4: Run all simplequery tests** + +Run: `go test ./internal/db/proxy/postgres/ -run TestHandleQuery -count=1 -v` +Expected: all PASS. + +Run the package's full suite to confirm no regression: + +Run: `go test ./internal/db/proxy/postgres/ -count=1` +Expected: all green. + +- [ ] **Step 5: Commit** + +```bash +git add internal/db/proxy/postgres/simplequery.go \ + internal/db/proxy/postgres/simplequery_test.go +git commit -m "db: proxy — handleQuery deny path with RFQ-gated synth + sibling tagging" +``` + +--- + +## Task 14: Wire `simpleQueryLoop` into `handshake.dialUpstreamAndForward` + config-load warning for `approve` + +**Why:** Two small wiring steps. (1) After `forwardAuth` returns nil, `dialUpstreamAndForward` currently returns nil; we change it to seed `redactionTier` / `tlsMode` and call `simpleQueryLoop`. (2) `policy.Decode` emits a `Warning` for every rule with `decision: approve` so operators see APPROVE_NOT_YET_SUPPORTED at config load. + +**Files:** +- Modify: `internal/db/proxy/postgres/handshake.go` +- Modify: `internal/db/policy/decode.go` +- Modify: `internal/db/policy/decode_test.go` + +- [ ] **Step 1: Write failing test for the decode warning** + +Append to `internal/db/policy/decode_test.go`: + +```go +func TestDecode_WarnsOnApproveDecision(t *testing.T) { + yaml := []byte(` +services: + - name: appdb + family: postgres + dialect: postgres + upstream: "127.0.0.1:5432" + tls_mode: terminate_reissue + +rules: + - name: review-deletes + decision: approve + operations: [write] + services: [appdb] + objects: ['*'] +`) + _, warnings, err := Decode(yaml) + if err != nil { + t.Fatalf("Decode: %v", err) + } + var found bool + for _, w := range warnings { + if w.Code == "APPROVE_NOT_YET_SUPPORTED" && w.Rule == "review-deletes" { + found = true + } + } + if !found { + t.Fatalf("expected APPROVE_NOT_YET_SUPPORTED warning, got %+v", warnings) + } +} +``` + +- [ ] **Step 2: Write failing test for the loop entry** + +Append to `internal/db/proxy/postgres/handshake_test.go` (or extend an existing forward-auth test): + +```go +func TestDialUpstreamAndForward_EntersSimpleQueryLoopAfterRFQ(t *testing.T) { + pc, _, fakeUp, sink := newDialUpstreamFixture(t) + pc.srv.SetPolicy(allowAllRuleSet(t)) + + fakeUp.ScriptAuth([]pgproto3.BackendMessage{ + &pgproto3.AuthenticationOk{}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + }) + fakeUp.ScriptOnFirstQuery([]pgproto3.BackendMessage{ + &pgproto3.RowDescription{Fields: []pgproto3.FieldDescription{{Name: []byte("a")}}}, + &pgproto3.DataRow{Values: [][]byte{[]byte("1")}}, + &pgproto3.CommandComplete{CommandTag: []byte("SELECT 1")}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + }) + + // Client sends a 'Q' after handshake; the proxy should classify+forward. + go pc.dispatchStartup(context.Background()) + + // (Implementation of newDialUpstreamFixture lives below in this file.) + // Assert: one statement event in sink after the round-trip. + // ... + _ = sink +} +``` + +This test is structurally larger than the others and exists for full path-coverage. The exact fixture shape may need to lean on the existing `testupstream_test.go` helper from 04b₂ — extend it to script a post-auth Q response. + +- [ ] **Step 3: Run tests to verify failure** + +Run: `go test ./internal/db/policy/ -run TestDecode_WarnsOnApproveDecision -count=1` +Expected: FAIL — no warning emitted today. + +Run: `go test ./internal/db/proxy/postgres/ -run TestDialUpstreamAndForward_EntersSimpleQueryLoopAfterRFQ -count=1` +Expected: FAIL or build error — `simpleQueryLoop` not yet called from the handshake. + +- [ ] **Step 4: Emit the approve warning in `policy.Decode`** + +Modify `internal/db/policy/decode.go`. After statement-rule decoding, iterate rules and append a `Warning`: + +```go + for _, r := range statementRules { + if r.Decision == "approve" { + warnings = append(warnings, Warning{ + Rule: r.Name, + Field: "decision", + Code: "APPROVE_NOT_YET_SUPPORTED", + Message: "decision: approve is parsed but treated as deny at runtime until Plan 05", + Line: r.line, // existing yaml.v3 node line + }) + } + } +``` + +Adjust naming to match the existing decode loop — the symbol names in the codebase may differ slightly. + +- [ ] **Step 5: Call `simpleQueryLoop` after `forwardAuth` returns** + +Modify `internal/db/proxy/postgres/handshake.go`. In `dialUpstreamAndForward`, the tail of the function currently looks like: + +```go + if err := forwardAuth(ctx, pc); err != nil { + if errors.Is(err, errScramPlusFailClosed) { + pc.emitHandshakeFail(ctx, scramPlusEventCode) + return nil + } + return nil + } + return nil +} +``` + +Replace with: + +```go + if err := forwardAuth(ctx, pc); err != nil { + if errors.Is(err, errScramPlusFailClosed) { + pc.emitHandshakeFail(ctx, scramPlusEventCode) + return nil + } + return nil + } + // Hand off to the Simple Query loop. forwardAuth already wrote the + // observed 'Z' status byte into pc.state.lastUpstreamRFQ. + if rs := pc.srv.policy(); rs != nil { + pc.state.redactionTier = rs.Redaction().LogStatements + } else { + pc.state.redactionTier = policy.RedactParametersRedacted + } + pc.state.tlsMode = pc.svc.TLSMode + return pc.simpleQueryLoop(ctx) +} +``` + +Add the `"github.com/agentsh/agentsh/internal/db/policy"` import if not present. + +- [ ] **Step 6: Run tests to confirm passing** + +Run: `go test ./internal/db/policy/ -count=1` +Expected: green, including the new warning test. + +Run: `go test ./internal/db/proxy/postgres/ -count=1` +Expected: green. + +- [ ] **Step 7: Commit** + +```bash +git add internal/db/proxy/postgres/handshake.go \ + internal/db/policy/decode.go \ + internal/db/policy/decode_test.go \ + internal/db/proxy/postgres/handshake_test.go +git commit -m "db: wire simpleQueryLoop into handshake + warn on approve at config-load" +``` + +--- + +## Task 15: Spine integration test — real `pgx` + fake upstream + +**Why:** The skeleton design's "does the whole shape compose" test. Three subtests: allow, pre-tx deny, in-tx deny terminate. Adds `pgx` as a test-only dep. + +**Files:** +- Modify: `internal/db/proxy/postgres/spine_test.go` (extend existing file from 04b₂) +- Modify: `go.mod` / `go.sum` + +- [ ] **Step 1: Promote `pgx` to a top-level test dep** + +Run: + +```bash +go get github.com/jackc/pgx/v5 +go mod tidy +``` + +Expected: `pgx` moves from `// indirect` to a top-level entry; checksum changes. + +- [ ] **Step 2: Write the three failing spine subtests** + +Append to `internal/db/proxy/postgres/spine_test.go`: + +```go +func TestSpine_Plan04c_SimpleQuery_AllowFlow(t *testing.T) { + t.Parallel() + env := newSpineEnv(t, withSpinePolicy(allowAllRuleSet(t))) + defer env.Close() + + env.Upstream.ScriptOnFirstQuery([]pgproto3.BackendMessage{ + &pgproto3.RowDescription{Fields: []pgproto3.FieldDescription{{Name: []byte("a")}}}, + &pgproto3.DataRow{Values: [][]byte{[]byte("1")}}, + &pgproto3.CommandComplete{CommandTag: []byte("SELECT 1")}, + &pgproto3.ReadyForQuery{TxStatus: 'I'}, + }) + + conn, err := pgx.Connect(context.Background(), env.PgxConnString()) + if err != nil { + t.Fatalf("pgx.Connect: %v", err) + } + defer conn.Close(context.Background()) + + rows, err := conn.Query(context.Background(), "SELECT 1") + if err != nil { + t.Fatalf("Query: %v", err) + } + if !rows.Next() { + t.Fatalf("expected one row, got none") + } + rows.Close() + + evs := env.Sink.DrainStatements() + if len(evs) != 1 || evs[0].Decision.Verb != "allow" { + t.Fatalf("events = %+v", evs) + } + if evs[0].Result.RowsReturned == nil || *evs[0].Result.RowsReturned != 1 { + t.Fatalf("RowsReturned = %v want 1", evs[0].Result.RowsReturned) + } +} + +func TestSpine_Plan04c_SimpleQuery_DenyPreTx(t *testing.T) { + t.Parallel() + env := newSpineEnv(t, withSpinePolicy(denyDeletesRuleSet(t))) + defer env.Close() + + conn, err := pgx.Connect(context.Background(), env.PgxConnString()) + if err != nil { + t.Fatalf("pgx.Connect: %v", err) + } + defer conn.Close(context.Background()) + + _, err = conn.Exec(context.Background(), "DELETE FROM t") + if err == nil { + t.Fatalf("Exec: expected deny error, got nil") + } + var pgErr *pgconn.PgError + if !errors.As(err, &pgErr) || pgErr.Code != "42501" { + t.Fatalf("error = %v (code = %v) want 42501", err, pgErrCodeOrEmpty(err)) + } + if env.Upstream.BytesReceivedAfterStartup() != 0 { + t.Fatalf("upstream received %d bytes after startup; want 0 (deny pre-forward)", + env.Upstream.BytesReceivedAfterStartup()) + } + + evs := env.Sink.DrainStatements() + if len(evs) != 1 || evs[0].Decision.Verb != "deny" { + t.Fatalf("events = %+v", evs) + } +} + +func TestSpine_Plan04c_SimpleQuery_DenyInTx_Terminates(t *testing.T) { + t.Parallel() + env := newSpineEnv(t, withSpinePolicy(denyDeletesRuleSet(t))) + defer env.Close() + + // BEGIN is an allowed statement (session, covered by allow-reads? — no, + // the deny fixture only allows reads. We need a fixture that allows + // BEGIN as well. Use a separate fixture:) + env.Server.SetPolicy(allowReadsAndSessionsDenyWritesRuleSet(t)) + + env.Upstream.ScriptOnNthQuery(1, []pgproto3.BackendMessage{ + &pgproto3.CommandComplete{CommandTag: []byte("BEGIN")}, + &pgproto3.ReadyForQuery{TxStatus: 'T'}, + }) + + conn, err := pgx.Connect(context.Background(), env.PgxConnString()) + if err != nil { + t.Fatalf("pgx.Connect: %v", err) + } + + // Issue BEGIN via SimpleProtocol. + _, err = conn.Exec(context.Background(), "BEGIN") + if err != nil { + t.Fatalf("BEGIN: %v", err) + } + // Now DELETE — must be denied and the conn terminated. + _, err = conn.Exec(context.Background(), "DELETE FROM t") + if err == nil { + t.Fatalf("Exec DELETE: expected deny error") + } + + // Subsequent op must fail with closed-conn. + _, err = conn.Exec(context.Background(), "SELECT 1") + if err == nil { + t.Fatalf("expected closed-conn error on next op") + } + + evs := env.Sink.DrainStatements() + var deny events.DBEvent + for _, e := range evs { + if e.Decision.Verb == "deny" { + deny = e + break + } + } + if deny.TxContext.DenyAction != "connection_terminated" { + t.Fatalf("DenyAction = %q want connection_terminated", deny.TxContext.DenyAction) + } +} + +func pgErrCodeOrEmpty(err error) string { + var pgErr *pgconn.PgError + if errors.As(err, &pgErr) { + return pgErr.Code + } + return "" +} +``` + +The fixture `newSpineEnv` + `withSpinePolicy` + `env.PgxConnString()` + `env.Upstream.BytesReceivedAfterStartup()` must be authored to: +1. Bind the proxy to a `t.TempDir()` Unix socket. +2. Issue the AgentSH CA via the existing `tlsleaf` package. +3. Build a `pgxpool`-friendly conn string with `sslmode=verify-full`, `sslrootcert=` set to the CA path, `host=` set to the Unix socket dir. +4. Spin a fake upstream from `testupstream_test.go` (extend with `BytesReceivedAfterStartup` if missing). + +Put helpers near the existing 04b₂ spine helpers. + +- [ ] **Step 3: Run tests to verify they fail** + +Run: `go test ./internal/db/proxy/postgres/ -run TestSpine_Plan04c -count=1` +Expected: FAIL on missing fixture helpers; flesh them out incrementally. + +- [ ] **Step 4: Author the fixture helpers** + +In `internal/db/proxy/postgres/spine_test.go`, add `newSpineEnv` + `spineEnv`. Sketch: + +```go +type spineEnv struct { + Server *Server + Upstream *fakeUpstream + Sink *events.SyncSink + CAPath string + SockDir string +} + +func newSpineEnv(t *testing.T, opts ...spineOpt) *spineEnv { + t.Helper() + dir := t.TempDir() + caDir := filepath.Join(dir, "state") + if err := os.MkdirAll(caDir, 0o700); err != nil { + t.Fatal(err) + } + sockDir := filepath.Join(dir, "sock") + if err := os.MkdirAll(sockDir, 0o700); err != nil { + t.Fatal(err) + } + sockPath := filepath.Join(sockDir, ".s.PGSQL.5432") + + fake := newFakeUpstream(t) + + sink := &events.SyncSink{} + svc := Service{ + Name: "test", + Family: "postgres", + Dialect: "postgres", + Upstream: fake.Addr(), + TLSMode: "terminate_reissue", + Listen: ServiceListener{Kind: "unix", Path: sockPath}, + } + cfg := Config{ + Unavoidability: service.UnavoidabilityObserve, + Services: []Service{svc}, + StateDir: caDir, + Sink: sink, + // UpstreamTLSConfigForTest set to skip-verify against the fake upstream. + UpstreamTLSConfigForTest: &tls.Config{InsecureSkipVerify: true}, + } + for _, opt := range opts { + opt(&cfg) + } + s, err := New(cfg) + if err != nil { + t.Fatal(err) + } + ctx, cancel := context.WithCancel(context.Background()) + go func() { _ = s.Start(ctx) }() + t.Cleanup(func() { + cancel() + _ = s.Shutdown(context.Background()) + }) + + caPath := filepath.Join(caDir, "db-ca.crt") + return &spineEnv{ + Server: s, + Upstream: fake, + Sink: sink, + CAPath: caPath, + SockDir: sockDir, + } +} + +func (e *spineEnv) PgxConnString() string { + return fmt.Sprintf("host=%s port=5432 user=agent dbname=app sslmode=verify-full sslrootcert=%s", + e.SockDir, e.CAPath) +} + +func (e *spineEnv) Close() { /* covered by t.Cleanup */ } + +type spineOpt func(*Config) + +func withSpinePolicy(rs *policy.RuleSet) spineOpt { + return func(c *Config) { c.Policy = rs } +} +``` + +The `fakeUpstream` helper from 04b₂'s `testupstream_test.go` likely has `Addr()` and a script API; extend it with `ScriptOnFirstQuery` / `ScriptOnNthQuery` / `BytesReceivedAfterStartup` as needed. (If extending feels disproportionate, build a fresh fixture here and document why.) + +- [ ] **Step 5: Run the spine tests** + +Run: `go test ./internal/db/proxy/postgres/ -run TestSpine_Plan04c -count=1 -v` +Expected: all three subtests PASS. + +If `pgx` keeps the conn open via NOTICE pumps or background goroutines and the test hangs, add a Read deadline to the proxy-side conn or a `context.WithTimeout(t.Context(), 5*time.Second)` on every `pgx` call. + +- [ ] **Step 6: Cross-compile + full repo test** + +Run: `GOOS=windows go build ./...` +Expected: green. + +Run: `go test ./... -count=1` +Expected: green. + +Run: `go mod tidy && git diff --exit-code go.mod go.sum` +Expected: no further changes. + +- [ ] **Step 7: Commit** + +```bash +git add internal/db/proxy/postgres/spine_test.go internal/db/proxy/postgres/testupstream_test.go go.mod go.sum +git commit -m "db: proxy — spine integration test for Plan 04c (real pgx + fake upstream)" +``` + +--- + +## Self-review checklist (run after every task lands) + +Before opening the PR for Plan 04c, walk back through the spec and verify each requirement maps to at least one task: + +1. `Normalize` on Parser — Task 3. ✓ +2. `SourceStart` / `SourceEnd` on ClassifiedStatement — Tasks 1, 2. ✓ +3. DBEvent §8 sub-structs — Task 4. ✓ +4. `MaxQueryBytes` default + cap enforcement — Tasks 5, 8. ✓ +5. Atomic policy pointer + `SetPolicy` — Task 5. ✓ +6. Per-dialect classifier map — Task 5. ✓ +7. `connState` extensions + `'Z'` byte capture — Task 6. ✓ +8. `simpleQueryLoop` skeleton + non-Q reject — Task 7. ✓ +9. Per-frame upstream demux + counters — Task 9. ✓ +10. Deny synth + SQLSTATE picker — Task 10. ✓ +11. Eventbuilder + redaction + digest + sibling tagging — Task 11. ✓ +12. Allow path + per-stmt allow events — Task 12. ✓ +13. Deny path + RFQ-gated synth + per-stmt events with DenyAction — Task 13. ✓ +14. Loop wired into handshake + approve config-load warning — Task 14. ✓ +15. Spine integration test — Task 15. ✓ +16. Cross-compile (`GOOS=windows go build ./...`) — Tasks 3, 4, 5, 15. ✓ +17. `go mod tidy` clean after pgx promotion — Task 15. ✓ + +**Done definition** (mirrors spec §13): the spine test passes, `policies.db.unavoidability: off` is a no-op, `observe` mode end-to-end works, `go test ./...` green on Linux, cross-compile green on Windows. + +--- + +## Open questions surfaced during planning + +- **`effects.Effect.HasFilter`** — Plan 04c's `Predicates.HasFilter` reads from this field. If Plan 03 didn't surface it, `hasFilter()` returns false and a follow-up plan (or a small in-04c carve-out) wires the WHERE-clause detection. Flagged in Task 11 Step 3. +- **`SyncSink.lifecycle`** — verify against the current `sink.go`. Task 7 Step 5 assumes the structure; if 04b₂ shipped it differently, follow the existing shape. +- **`Decision.DenyMessage` template** — Plan 02's design mentions templates; the current `policy.Decision` struct does not expose `DenyMessage` directly. Task 10's `renderDenyMessage` falls back to RuleName/Reason. Adding template support is its own follow-up. + diff --git a/docs/superpowers/plans/2026-05-11-docker-sandboxes-mixin-kit.md b/docs/superpowers/plans/2026-05-11-docker-sandboxes-mixin-kit.md new file mode 100644 index 000000000..99436f318 --- /dev/null +++ b/docs/superpowers/plans/2026-05-11-docker-sandboxes-mixin-kit.md @@ -0,0 +1,2369 @@ +# Docker Sandboxes Mixin Kit — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a Docker Sandboxes mixin kit at `docker/sbx-kit/` that installs AgentSH into any sandbox at creation and routes the agent's command-level activity through a coding-agent-tuned policy. Invoked via `sbx run --kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit`. + +**Architecture:** A `schemaVersion: "1"` mixin kit (`spec.yaml` + `files/` tree) runs a one-shot `install` that curls a new `install.sh` from the latest GitHub release; `initFiles` injects PATH precedence files; the `startup` command runs a new `agentsh-sbx-bootstrap` binary that merges the baked coding-agent policy template with any user-supplied override into `/etc/agentsh/policies/default.yaml`, spawns `agentsh server`, then probes the shim enforcement tier and writes `/run/agentsh/tier`. v1 ships the shim tier only; LD_PRELOAD and ptrace tiers are parked behind forward-compatible tier labels. + +**Tech Stack:** Go (existing AgentSH stack), gopkg.in/yaml.v3, cobra, GoReleaser/nfpm for packaging, GitHub Actions for the release pipeline, Bash for the installer + smoke test, Docker Sandboxes `spec.yaml` schema v1. + +**Spec reference:** `docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md`. + +--- + +## Task 1: Coding-agent policy template + +**Files:** +- Create: `configs/policies/coding-agent.yaml` +- Test: `internal/policy/coding_agent_template_test.go` + +This task delivers the baked-in policy from spec §8. It validates by parsing through the existing `policy.LoadFromBytes()` loader, so any field-name typo fails the test immediately. + +- [ ] **Step 1: Write the failing validation test** + +Create `internal/policy/coding_agent_template_test.go`: + +```go +package policy + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// TestCodingAgentTemplate_Loads verifies the policy that the Docker Sandboxes +// mixin kit bakes into /etc/agentsh/policies/default.yaml parses cleanly +// through the canonical loader. Any field-name typo or schema drift will be +// caught here before the kit ships. +func TestCodingAgentTemplate_Loads(t *testing.T) { + path := filepath.Join("..", "..", "configs", "policies", "coding-agent.yaml") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read template: %v", err) + } + p, err := LoadFromBytes(data) + if err != nil { + t.Fatalf("load template: %v", err) + } + if p.Name != "coding-agent" { + t.Errorf("Name = %q, want %q", p.Name, "coding-agent") + } + if len(p.FileRules) == 0 { + t.Error("expected file_rules") + } + if len(p.CommandRules) == 0 { + t.Error("expected command_rules") + } + if len(p.SignalRules) == 0 { + t.Error("expected signal_rules") + } +} + +// TestCodingAgentTemplate_DeniesCredentialPaths spot-checks that the rules from +// the design spec are actually present. Coverage isn't exhaustive; this just +// catches accidental rule deletion during future edits. +func TestCodingAgentTemplate_DeniesCredentialPaths(t *testing.T) { + path := filepath.Join("..", "..", "configs", "policies", "coding-agent.yaml") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read template: %v", err) + } + body := string(data) + for _, want := range []string{ + "/.ssh/", + "/.aws/", + "/.gnupg/", + "/.kube/", + "/.netrc", + "/etc/agentsh/", + "/usr/lib/agentsh/", + } { + if !strings.Contains(body, want) { + t.Errorf("expected coding-agent.yaml to reference %q", want) + } + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./internal/policy -run TestCodingAgentTemplate_ -v` +Expected: FAIL — `configs/policies/coding-agent.yaml` does not exist. + +- [ ] **Step 3: Create the coding-agent policy** + +Create `configs/policies/coding-agent.yaml`: + +```yaml +# Coding-agent policy for AgentSH inside Docker Sandboxes. +# This is the baked-in template the agentsh-sbx-bootstrap binary merges with +# any user override at /home/agent/.agentsh/policy.yaml on every sandbox start. +# +# Reference: /usr/share/doc/agentsh/policy-reference.md +# To extend: write rules to /home/agent/.agentsh/policy.yaml; the bootstrap +# merges them on top of this file (user wins on name collision; otherwise +# rules concatenate in declared order). + +version: 1 +name: coding-agent +description: | + Default policy for AI coding agents (Claude Code, OpenCode, Gemini CLI) + running inside Docker Sandboxes. Tuned for path/command granularity inside + the sandbox; outbound network controls are handled by the Docker Sandbox + proxy and intentionally not duplicated here. + +# ============================================================================= +# FILE RULES — evaluated in order, first match wins. +# ============================================================================= +file_rules: + + # ---- Sensitive credential paths: deny before any allow-home rule matches. + - name: deny-credential-paths + description: Block reads/writes of host credentials that may have leaked into the sandbox. + paths: + - "/home/**/.ssh/**" + - "/home/**/.aws/**" + - "/home/**/.gnupg/**" + - "/home/**/.kube/**" + - "/home/**/.docker/config.json" + - "/home/**/.netrc" + - "/home/**/.config/gcloud/**" + - "/home/**/.config/gh/**" + - "/home/**/.config/git-credentials" + - "/root/.ssh/**" + - "/root/.aws/**" + - "/root/.gnupg/**" + - "/root/.kube/**" + - "/root/.netrc" + operations: ["*"] + decision: deny + message: "Access to credential path {{.Path}} is denied by the coding-agent policy." + + # ---- AgentSH self-protection: agent cannot edit its own policy/logs/binaries. + - name: deny-self-write + description: Prevent the agent from tampering with AgentSH state. + paths: + - "/etc/agentsh/**" + - "/usr/lib/agentsh/**" + - "/usr/share/agentsh/**" + - "/run/agentsh/**" + - "/var/lib/agentsh/**" + - "/var/log/agentsh/**" + operations: [write, create, mkdir, chmod, rename, delete, rmdir] + decision: deny + message: "Write to AgentSH-controlled path {{.Path}} is denied." + + # ---- Workspace: full read/write; deletes are soft so rm -rf is recoverable. + - name: allow-workspace-read + paths: ["/workspace", "/workspace/**"] + operations: [read, open, stat, list, readlink] + decision: allow + + - name: allow-workspace-write + paths: ["/workspace", "/workspace/**"] + operations: [write, create, mkdir, chmod, rename] + decision: allow + + - name: soft-delete-workspace + description: Soft-delete workspace files (recoverable via /var/lib/agentsh/trash). + paths: ["/workspace", "/workspace/**"] + operations: [delete, rmdir] + decision: soft_delete + message: "File quarantined (recoverable): {{.Path}}" + + # ---- Home: read/write everywhere except the credential paths denied above. + - name: allow-home + paths: ["/home/**", "/root/**"] + operations: ["*"] + decision: allow + + # ---- Package manager caches: full allow (routine for coding work). + - name: allow-package-caches + paths: + - "/home/**/.npm/**" + - "/home/**/.cache/pip/**" + - "/home/**/.cargo/**" + - "/home/**/.cache/go-build/**" + - "/home/**/.rustup/**" + - "/home/**/.gradle/caches/**" + - "/home/**/.m2/**" + - "/root/.npm/**" + - "/root/.cache/pip/**" + - "/root/.cargo/**" + operations: ["*"] + decision: allow + + # ---- Tmp: full access. + - name: allow-tmp + paths: ["/tmp/**", "/var/tmp/**"] + operations: ["*"] + decision: allow + + # ---- System paths: read-only allow. + - name: allow-system-read + paths: + - "/usr/**" + - "/lib/**" + - "/lib64/**" + - "/bin/**" + - "/sbin/**" + - "/opt/**" + operations: [read, open, stat, list, readlink] + decision: allow + + - name: allow-etc-read-safe + paths: + - "/etc/hosts" + - "/etc/resolv.conf" + - "/etc/ssl/**" + - "/etc/ca-certificates/**" + - "/etc/localtime" + - "/etc/timezone" + - "/etc/mime.types" + - "/etc/protocols" + - "/etc/services" + - "/etc/environment" + - "/etc/environment.d/**" + - "/etc/profile.d/**" + operations: [read, open, stat] + decision: allow + +# ============================================================================= +# COMMAND RULES +# ============================================================================= +command_rules: + + - name: deny-privilege-escalation + description: The Docker Sandbox already pins the agent to a fixed user; escalation is suspicious. + commands: [sudo, su, doas] + decision: deny + message: "Privilege escalation via {{.Command}} is denied inside a Docker Sandbox." + + - name: audit-curl-pipe-to-shell + description: Audit curl/wget piped to sh/bash. v1.1 will replace this with redirect to agentsh-fetch. + commands: [curl, wget] + args_patterns: + - ".*\\|\\s*(sh|bash|zsh).*" + decision: audit + message: "curl|sh pattern detected: {{.Command}} {{.Args}}" + + - name: approve-recursive-chmod + description: Require approval for chmod -R on / or /home, or chmod 777. + commands: [chmod] + args_patterns: + - "^-R\\s+/$" + - "^-R\\s+/home.*" + - ".*777.*" + decision: approve + message: "Recursive or world-writable chmod requested: chmod {{.Args}}" + timeout: 5m + + - name: allow-package-installers + description: Routine for coding agents; allow with audit. + commands: [pip, pip3, npm, yarn, pnpm, cargo, apt, apt-get, gem, bundle] + decision: allow + +# ============================================================================= +# SIGNAL RULES +# ============================================================================= +signal_rules: + + - name: deny-signal-pid1 + description: The agent must not signal PID 1. + signals: ["@fatal", "@job"] + target: + type: pid_range + min: 1 + max: 1 + decision: deny + message: "Signaling PID 1 is denied." + + - name: deny-signal-agentsh + description: The agent must not signal AgentSH processes. + signals: ["@fatal"] + target: + type: external + pattern: "agentsh*" + decision: deny + message: "Signaling AgentSH is denied." + + - name: allow-signal-own-tree + description: Allow signals within the agent's own subprocess tree. + signals: ["@fatal", "@job"] + target: + type: children + decision: allow + +# ============================================================================= +# AUDIT +# ============================================================================= +audit: + log_allowed: false + log_denied: true + log_approved: true + retention_days: 7 +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./internal/policy -run TestCodingAgentTemplate_ -v` +Expected: PASS. + +- [ ] **Step 5: Run the full policy package test suite to confirm no regressions** + +Run: `go test ./internal/policy/... -count=1` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add configs/policies/coding-agent.yaml internal/policy/coding_agent_template_test.go +git commit -m "policy: add coding-agent template baked into the Docker Sandboxes mixin kit" +``` + +--- + +## Task 2: Policy merge helper + +**Files:** +- Create: `internal/policy/merge.go` +- Test: `internal/policy/merge_test.go` + +The bootstrap binary needs to merge the baked template with the user-supplied override fragment. Semantics: user wins on rule-name collisions; otherwise rules concatenate in declared order. Keep the merge contained to a new file so the existing loader is unchanged. + +- [ ] **Step 1: Write the failing test** + +Create `internal/policy/merge_test.go`: + +```go +package policy + +import ( + "testing" +) + +func TestMergeOverlay_OverlayWinsOnNameCollision(t *testing.T) { + base := &Policy{ + Version: 1, + Name: "base", + FileRules: []FileRule{ + {Name: "rule-a", Decision: "allow", Paths: []string{"/a"}}, + {Name: "rule-b", Decision: "allow", Paths: []string{"/b"}}, + }, + } + overlay := &Policy{ + Version: 1, + Name: "overlay", + FileRules: []FileRule{ + {Name: "rule-b", Decision: "deny", Paths: []string{"/b"}}, + {Name: "rule-c", Decision: "allow", Paths: []string{"/c"}}, + }, + } + + merged := MergeOverlay(base, overlay) + + if got := len(merged.FileRules); got != 3 { + t.Fatalf("len(FileRules) = %d, want 3", got) + } + if merged.FileRules[0].Name != "rule-a" { + t.Errorf("FileRules[0].Name = %q, want rule-a", merged.FileRules[0].Name) + } + if merged.FileRules[1].Name != "rule-b" || merged.FileRules[1].Decision != "deny" { + t.Errorf("FileRules[1] = %+v, want rule-b with decision=deny (overlay wins)", merged.FileRules[1]) + } + if merged.FileRules[2].Name != "rule-c" { + t.Errorf("FileRules[2].Name = %q, want rule-c", merged.FileRules[2].Name) + } +} + +func TestMergeOverlay_NilOverlayReturnsBase(t *testing.T) { + base := &Policy{Version: 1, Name: "base", FileRules: []FileRule{{Name: "x"}}} + merged := MergeOverlay(base, nil) + if merged != base { + t.Errorf("MergeOverlay(base, nil) should return base unchanged") + } +} + +func TestMergeOverlay_NilBaseReturnsOverlay(t *testing.T) { + overlay := &Policy{Version: 1, Name: "overlay", FileRules: []FileRule{{Name: "x"}}} + merged := MergeOverlay(nil, overlay) + if merged != overlay { + t.Errorf("MergeOverlay(nil, overlay) should return overlay unchanged") + } +} + +func TestMergeOverlay_PreservesAllRuleKinds(t *testing.T) { + base := &Policy{ + Version: 1, + Name: "base", + FileRules: []FileRule{{Name: "f1"}}, + CommandRules: []CommandRule{{Name: "c1"}}, + SignalRules: []SignalRule{{Name: "s1"}}, + NetworkRules: []NetworkRule{{Name: "n1"}}, + } + overlay := &Policy{ + Version: 1, + Name: "overlay", + FileRules: []FileRule{{Name: "f2"}}, + CommandRules: []CommandRule{{Name: "c2"}}, + SignalRules: []SignalRule{{Name: "s2"}}, + NetworkRules: []NetworkRule{{Name: "n2"}}, + } + merged := MergeOverlay(base, overlay) + if len(merged.FileRules) != 2 || len(merged.CommandRules) != 2 || + len(merged.SignalRules) != 2 || len(merged.NetworkRules) != 2 { + t.Errorf("merged rule counts wrong: %+v", merged) + } +} + +func TestMergeOverlay_KeepsBaseMetadata(t *testing.T) { + base := &Policy{Version: 1, Name: "base", Description: "from base"} + overlay := &Policy{Version: 1, Name: "overlay"} + merged := MergeOverlay(base, overlay) + if merged.Name != "base" { + t.Errorf("merged.Name = %q, want %q (base metadata preserved)", merged.Name, "base") + } + if merged.Description != "from base" { + t.Errorf("merged.Description = %q, want %q", merged.Description, "from base") + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./internal/policy -run TestMergeOverlay -v` +Expected: FAIL — `MergeOverlay` is undefined. + +- [ ] **Step 3: Implement the merge helper** + +Create `internal/policy/merge.go`: + +```go +package policy + +// MergeOverlay returns a new Policy formed by overlaying `overlay` rules on +// top of `base`. Rules with matching names in overlay replace base entries +// in-place; other overlay rules are appended in declared order. Base metadata +// (Version, Name, Description, ResourceLimits, EnvPolicy, Audit) is +// preserved from base; overlay metadata is ignored. +// +// If either argument is nil, the other is returned unchanged. This lets +// callers handle "no user override" without a nil check at the call site. +// +// Used by cmd/agentsh-sbx-bootstrap to combine the baked coding-agent +// template with /home/agent/.agentsh/policy.yaml at sandbox startup. +func MergeOverlay(base, overlay *Policy) *Policy { + if base == nil { + return overlay + } + if overlay == nil { + return base + } + + out := *base + out.FileRules = mergeFileRules(base.FileRules, overlay.FileRules) + out.NetworkRules = mergeNetworkRules(base.NetworkRules, overlay.NetworkRules) + out.CommandRules = mergeCommandRules(base.CommandRules, overlay.CommandRules) + out.UnixRules = mergeUnixRules(base.UnixRules, overlay.UnixRules) + out.SignalRules = mergeSignalRules(base.SignalRules, overlay.SignalRules) + return &out +} + +func mergeFileRules(base, overlay []FileRule) []FileRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]FileRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeNetworkRules(base, overlay []NetworkRule) []NetworkRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]NetworkRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeCommandRules(base, overlay []CommandRule) []CommandRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]CommandRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeUnixRules(base, overlay []UnixSocketRule) []UnixSocketRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]UnixSocketRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeSignalRules(base, overlay []SignalRule) []SignalRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]SignalRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `go test ./internal/policy -run TestMergeOverlay -v` +Expected: PASS — all five test functions. + +- [ ] **Step 5: Run the full policy package** + +Run: `go test ./internal/policy/... -count=1` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add internal/policy/merge.go internal/policy/merge_test.go +git commit -m "policy: add MergeOverlay helper for sbx bootstrap policy stacking" +``` + +--- + +## Task 3: Bootstrap binary — policy merge + write + +**Files:** +- Create: `cmd/agentsh-sbx-bootstrap/main.go` +- Create: `cmd/agentsh-sbx-bootstrap/policy.go` +- Test: `cmd/agentsh-sbx-bootstrap/policy_test.go` + +The bootstrap binary is the brains of the kit's `startup` phase. This task lands only the policy-merge step in isolation so we can TDD it without conflating it with daemon launch and probing (later tasks). + +The merge step's job: read `/usr/share/agentsh/coding-agent.template.yaml`, read `/home/agent/.agentsh/policy.yaml` if present and parseable, merge via `policy.MergeOverlay`, write the result atomically to `/etc/agentsh/policies/default.yaml`. On any failure, fall back to writing just the bare template — never leave the file half-written. + +- [ ] **Step 1: Write the failing test** + +Create `cmd/agentsh-sbx-bootstrap/policy_test.go`: + +```go +package main + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +const baseTemplate = ` +version: 1 +name: coding-agent +file_rules: + - name: allow-tmp + paths: ["/tmp/**"] + operations: ["*"] + decision: allow +` + +func TestMergeAndWritePolicy_NoOverlay(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, overlay, out); err != nil { + t.Fatalf("mergeAndWritePolicy: %v", err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(got), "allow-tmp") { + t.Errorf("expected output to contain base rules; got: %s", got) + } +} + +func TestMergeAndWritePolicy_WithOverlay(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + overlayBody := ` +version: 1 +name: user-overlay +file_rules: + - name: allow-extra + paths: ["/data/**"] + operations: ["*"] + decision: allow +` + if err := os.WriteFile(overlay, []byte(overlayBody), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, overlay, out); err != nil { + t.Fatalf("mergeAndWritePolicy: %v", err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + body := string(got) + if !strings.Contains(body, "allow-tmp") { + t.Error("expected base rule allow-tmp in merged output") + } + if !strings.Contains(body, "allow-extra") { + t.Error("expected overlay rule allow-extra in merged output") + } +} + +func TestMergeAndWritePolicy_BadOverlayFallsBackToTemplate(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(overlay, []byte("not: [valid: yaml"), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, overlay, out); err != nil { + t.Fatalf("mergeAndWritePolicy should not error on bad overlay: %v", err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(string(got), "allow-tmp") { + t.Error("expected fallback to template-only on bad overlay") + } +} + +func TestMergeAndWritePolicy_MissingTemplateErrors(t *testing.T) { + dir := t.TempDir() + tmpl := filepath.Join(dir, "nonexistent.yaml") + overlay := filepath.Join(dir, "overlay.yaml") + out := filepath.Join(dir, "out.yaml") + + err := mergeAndWritePolicy(tmpl, overlay, out) + if err == nil { + t.Fatal("expected error when template is missing") + } +} + +func TestMergeAndWritePolicy_AtomicWrite(t *testing.T) { + // If the destination already exists with content X, and the merge succeeds, + // the file should contain the new content (i.e. rename, not append). + dir := t.TempDir() + tmpl := filepath.Join(dir, "tmpl.yaml") + out := filepath.Join(dir, "out.yaml") + + if err := os.WriteFile(tmpl, []byte(baseTemplate), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(out, []byte("stale: content\n"), 0644); err != nil { + t.Fatal(err) + } + if err := mergeAndWritePolicy(tmpl, "", out); err != nil { + t.Fatal(err) + } + got, err := os.ReadFile(out) + if err != nil { + t.Fatal(err) + } + if strings.Contains(string(got), "stale") { + t.Error("expected stale content to be replaced") + } +} +``` + +- [ ] **Step 2: Run the test to verify it fails** + +Run: `go test ./cmd/agentsh-sbx-bootstrap/... -v` +Expected: FAIL — package does not exist. + +- [ ] **Step 3: Create the `main.go` skeleton** + +Create `cmd/agentsh-sbx-bootstrap/main.go`: + +```go +// agentsh-sbx-bootstrap is the startup entrypoint installed into Docker +// Sandboxes by the AgentSH mixin kit. It merges the baked coding-agent +// policy with any user override, spawns the agentsh server, then probes +// the active enforcement tier and writes /run/agentsh/tier so the agent's +// SKILL.md can read it. +package main + +import ( + "flag" + "fmt" + "os" +) + +const ( + defaultTemplatePath = "/usr/share/agentsh/coding-agent.template.yaml" + defaultOverlayPath = "/home/agent/.agentsh/policy.yaml" + defaultPolicyPath = "/etc/agentsh/policies/default.yaml" + defaultTierPath = "/run/agentsh/tier" +) + +func main() { + var ( + tmpl = flag.String("template", defaultTemplatePath, "Baked-in policy template path") + overlay = flag.String("overlay", defaultOverlayPath, "User override fragment path (optional)") + policy = flag.String("policy", defaultPolicyPath, "Output merged policy path") + ) + flag.Parse() + + if err := mergeAndWritePolicy(*tmpl, *overlay, *policy); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: policy merge failed: %v\n", err) + os.Exit(1) + } + // Daemon spawn + tier probe land in Task 4 and Task 5. +} +``` + +- [ ] **Step 4: Implement `mergeAndWritePolicy`** + +Create `cmd/agentsh-sbx-bootstrap/policy.go`: + +```go +package main + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/agentsh/agentsh/internal/policy" + "gopkg.in/yaml.v3" +) + +// mergeAndWritePolicy reads the baked template at `tmpl`, reads the optional +// user override at `overlay` (any read or parse failure is logged to stderr +// and treated as "no overlay"), merges them via policy.MergeOverlay, and +// writes the result atomically to `out` via a temp file + rename. +// +// Returns an error only when the template itself cannot be read or parsed. +// A missing/broken overlay is intentionally non-fatal: the template alone is +// always a safe fallback and the bootstrap is required to fail-open. +func mergeAndWritePolicy(tmpl, overlay, out string) error { + tmplBytes, err := os.ReadFile(tmpl) + if err != nil { + return fmt.Errorf("read template: %w", err) + } + base, err := policy.LoadFromBytes(tmplBytes) + if err != nil { + return fmt.Errorf("parse template: %w", err) + } + + var ov *policy.Policy + if overlay != "" { + ovBytes, ovErr := os.ReadFile(overlay) + switch { + case os.IsNotExist(ovErr): + // No override file: fine. Bare template wins. + case ovErr != nil: + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: read overlay %q: %v (falling back to template only)\n", overlay, ovErr) + default: + parsed, pErr := policy.LoadFromBytes(ovBytes) + if pErr != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: parse overlay %q: %v (falling back to template only)\n", overlay, pErr) + } else { + ov = parsed + } + } + } + + merged := policy.MergeOverlay(base, ov) + + mergedYAML, err := yaml.Marshal(merged) + if err != nil { + return fmt.Errorf("marshal merged policy: %w", err) + } + + if err := os.MkdirAll(filepath.Dir(out), 0o755); err != nil { + return fmt.Errorf("mkdir output dir: %w", err) + } + tmp := out + ".tmp" + if err := os.WriteFile(tmp, mergedYAML, 0o644); err != nil { + return fmt.Errorf("write tmp: %w", err) + } + if err := os.Rename(tmp, out); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("rename: %w", err) + } + return nil +} +``` + +- [ ] **Step 5: Run test to verify it passes** + +Run: `go test ./cmd/agentsh-sbx-bootstrap/... -v` +Expected: PASS — all five test functions. + +- [ ] **Step 6: Build the binary** + +Run: `go build ./cmd/agentsh-sbx-bootstrap` +Expected: success, binary created in cwd. + +- [ ] **Step 7: Commit** + +```bash +git add cmd/agentsh-sbx-bootstrap/main.go cmd/agentsh-sbx-bootstrap/policy.go cmd/agentsh-sbx-bootstrap/policy_test.go +git commit -m "bootstrap: cmd/agentsh-sbx-bootstrap with policy merge step" +``` + +--- + +## Task 4: Bootstrap binary — daemon spawn + socket wait + +**Files:** +- Create: `cmd/agentsh-sbx-bootstrap/daemon.go` +- Test: `cmd/agentsh-sbx-bootstrap/daemon_test.go` +- Modify: `cmd/agentsh-sbx-bootstrap/main.go` + +This task adds the daemon-spawn step to the bootstrap. It fork-execs `agentsh server --config /etc/agentsh/config.yaml` in the background and waits up to 2s for the daemon's Unix socket to appear. On timeout, the bootstrap logs to `/var/log/agentsh/bootstrap.log` and continues — the tier probe (next task) will record `tier=none` if the socket is absent. + +The daemon spawn is tested with a fake `agentsh` binary in the test's PATH that just touches the socket path. That keeps the test hermetic and avoids depending on the real `agentsh server` startup time. + +- [ ] **Step 1: Write the failing test** + +Create `cmd/agentsh-sbx-bootstrap/daemon_test.go`: + +```go +package main + +import ( + "os" + "path/filepath" + "runtime" + "testing" + "time" +) + +func TestSpawnDaemonAndWait_SocketAppears(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("unix sockets only") + } + dir := t.TempDir() + sock := filepath.Join(dir, "agentsh.sock") + + // Fake "daemon": a shell script that writes the socket file after a small + // delay. The bootstrap should observe it within the 2s window. + fakeBin := filepath.Join(dir, "fake-agentsh") + script := "#!/bin/sh\n(sleep 0.1; touch " + sock + ") &\nexec sleep 5\n" + if err := os.WriteFile(fakeBin, []byte(script), 0o755); err != nil { + t.Fatal(err) + } + + logPath := filepath.Join(dir, "bootstrap.log") + cmd, err := spawnDaemon(fakeBin, []string{"server"}, logPath) + if err != nil { + t.Fatalf("spawnDaemon: %v", err) + } + t.Cleanup(func() { _ = cmd.Process.Kill() }) + + if err := waitForSocket(sock, 2*time.Second); err != nil { + t.Fatalf("waitForSocket: %v", err) + } +} + +func TestWaitForSocket_TimesOut(t *testing.T) { + dir := t.TempDir() + sock := filepath.Join(dir, "nope.sock") + start := time.Now() + err := waitForSocket(sock, 200*time.Millisecond) + if err == nil { + t.Fatal("expected timeout error") + } + if elapsed := time.Since(start); elapsed > 1*time.Second { + t.Errorf("waitForSocket overshot deadline: %v", elapsed) + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./cmd/agentsh-sbx-bootstrap/... -run TestSpawnDaemonAndWait -v` +Expected: FAIL — `spawnDaemon` and `waitForSocket` are undefined. + +- [ ] **Step 3: Implement daemon spawn + socket wait** + +Create `cmd/agentsh-sbx-bootstrap/daemon.go`: + +```go +package main + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "time" +) + +// spawnDaemon fork-execs `bin args...` with stdout/stderr appended to logPath. +// The child is detached; the returned *exec.Cmd lets the caller signal it if +// needed (in normal flow the bootstrap exits after probing and the daemon +// keeps running, reparented to PID 1). +func spawnDaemon(bin string, args []string, logPath string) (*exec.Cmd, error) { + if err := os.MkdirAll(filepath.Dir(logPath), 0o755); err != nil { + return nil, fmt.Errorf("mkdir log dir: %w", err) + } + logF, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return nil, fmt.Errorf("open log: %w", err) + } + cmd := exec.Command(bin, args...) + cmd.Stdout = logF + cmd.Stderr = logF + cmd.Env = os.Environ() + if err := cmd.Start(); err != nil { + logF.Close() + return nil, fmt.Errorf("start %s: %w", bin, err) + } + // Release the parent's reference to the log file FD once exec(2) has dup'd + // stdio. The child keeps its own dup'd FD. + go func() { _ = logF.Close() }() + return cmd, nil +} + +// waitForSocket polls for a filesystem entry at sockPath, returning nil as +// soon as it exists. Returns an error if the deadline elapses first. +// +// We check existence rather than `Dial` because the daemon may use a +// different socket type (gRPC vs HTTP) and a successful Dial isn't required +// to confirm "the daemon has started writing its socket" — only that the +// file exists. +func waitForSocket(sockPath string, deadline time.Duration) error { + end := time.Now().Add(deadline) + for time.Now().Before(end) { + if _, err := os.Stat(sockPath); err == nil { + return nil + } + time.Sleep(50 * time.Millisecond) + } + return fmt.Errorf("socket %q did not appear within %s", sockPath, deadline) +} +``` + +- [ ] **Step 4: Wire it into `main.go`** + +Replace `cmd/agentsh-sbx-bootstrap/main.go` with: + +```go +// agentsh-sbx-bootstrap is the startup entrypoint installed into Docker +// Sandboxes by the AgentSH mixin kit. It merges the baked coding-agent +// policy with any user override, spawns the agentsh server, then probes +// the active enforcement tier and writes /run/agentsh/tier so the agent's +// SKILL.md can read it. +package main + +import ( + "flag" + "fmt" + "os" + "time" +) + +const ( + defaultTemplatePath = "/usr/share/agentsh/coding-agent.template.yaml" + defaultOverlayPath = "/home/agent/.agentsh/policy.yaml" + defaultPolicyPath = "/etc/agentsh/policies/default.yaml" + defaultTierPath = "/run/agentsh/tier" + defaultBootstrapLog = "/var/log/agentsh/bootstrap.log" + defaultDaemonLog = "/var/log/agentsh/daemon.log" + defaultAgentshBin = "/usr/bin/agentsh" + defaultServerConfig = "/etc/agentsh/config.yaml" + defaultDaemonSocket = "/run/agentsh/agentsh.sock" + defaultSocketTimeout = 2 * time.Second +) + +func main() { + var ( + tmpl = flag.String("template", defaultTemplatePath, "Baked policy template path") + overlay = flag.String("overlay", defaultOverlayPath, "User override fragment path") + policy = flag.String("policy", defaultPolicyPath, "Output merged policy path") + agentshBin = flag.String("agentsh", defaultAgentshBin, "Path to the agentsh binary") + srvConfig = flag.String("server-config", defaultServerConfig, "Path to the agentsh server config") + sock = flag.String("socket", defaultDaemonSocket, "Daemon socket path to poll for readiness") + ) + flag.Parse() + + if err := mergeAndWritePolicy(*tmpl, *overlay, *policy); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: policy merge failed: %v\n", err) + os.Exit(1) + } + + if _, err := spawnDaemon(*agentshBin, []string{"server", "--config", *srvConfig}, defaultDaemonLog); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: spawn daemon: %v\n", err) + os.Exit(1) + } + + if err := waitForSocket(*sock, defaultSocketTimeout); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: %v (continuing with degraded tier)\n", err) + // Don't exit — tier probe will record tier=none. + } + + // Tier probe lands in Task 5. +} +``` + +- [ ] **Step 5: Run tests** + +Run: `go test ./cmd/agentsh-sbx-bootstrap/... -v` +Expected: PASS — all merge tests plus the two new daemon tests. + +- [ ] **Step 6: Build to verify the package still compiles** + +Run: `go build ./cmd/agentsh-sbx-bootstrap` +Expected: success. + +- [ ] **Step 7: Commit** + +```bash +git add cmd/agentsh-sbx-bootstrap/daemon.go cmd/agentsh-sbx-bootstrap/daemon_test.go cmd/agentsh-sbx-bootstrap/main.go +git commit -m "bootstrap: spawn agentsh server and wait for socket" +``` + +--- + +## Task 5: Bootstrap binary — tier-1 (shim) probe and tier file + +**Files:** +- Create: `cmd/agentsh-sbx-bootstrap/tier.go` +- Test: `cmd/agentsh-sbx-bootstrap/tier_test.go` +- Modify: `cmd/agentsh-sbx-bootstrap/main.go` + +This task adds the shim-tier probe. The probe spawns `/bin/sh -c 'command -v curl'` and checks the resolved path starts with the shim directory. The active tier (`shim` or `none`) is written to `/run/agentsh/tier`. + +- [ ] **Step 1: Write the failing test** + +Create `cmd/agentsh-sbx-bootstrap/tier_test.go`: + +```go +package main + +import ( + "os" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestProbeShimTier_DetectsShimOnPath(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("shell-based probe is POSIX only") + } + dir := t.TempDir() + shimDir := filepath.Join(dir, "shims") + if err := os.Mkdir(shimDir, 0o755); err != nil { + t.Fatal(err) + } + // Place a fake `curl` executable in the shim dir. + fakeCurl := filepath.Join(shimDir, "curl") + if err := os.WriteFile(fakeCurl, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatal(err) + } + + // Inject the shim dir at the front of PATH for the probe. + t.Setenv("PATH", shimDir+string(os.PathListSeparator)+os.Getenv("PATH")) + + ok, resolved, err := probeShimTier(shimDir) + if err != nil { + t.Fatalf("probeShimTier: %v", err) + } + if !ok { + t.Errorf("expected probe to detect shim; resolved=%q", resolved) + } + if !strings.HasPrefix(resolved, shimDir) { + t.Errorf("resolved %q should be under shim dir %q", resolved, shimDir) + } +} + +func TestProbeShimTier_RejectsRealCurl(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("shell-based probe is POSIX only") + } + // Don't put any shim on PATH. The system curl (if present) should NOT + // match the shim dir, so the probe returns false. + t.Setenv("PATH", "/usr/bin:/bin") + ok, _, err := probeShimTier("/nonexistent/shims") + if err != nil { + // "command -v" failing because curl isn't installed is fine; it + // surfaces as ok=false, err=nil. If it does error, we want to know. + t.Logf("probe returned err (acceptable): %v", err) + } + if ok { + t.Errorf("expected probe to NOT detect shim when only /usr/bin/curl is reachable") + } +} + +func TestWriteTierFile(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "tier") + if err := writeTierFile(path, "shim"); err != nil { + t.Fatalf("writeTierFile: %v", err) + } + got, err := os.ReadFile(path) + if err != nil { + t.Fatal(err) + } + if string(got) != "shim\n" { + t.Errorf("tier file = %q, want %q", got, "shim\n") + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./cmd/agentsh-sbx-bootstrap/... -run "TestProbeShimTier|TestWriteTierFile" -v` +Expected: FAIL — `probeShimTier` and `writeTierFile` are undefined. + +- [ ] **Step 3: Implement the probe and tier-file writer** + +Create `cmd/agentsh-sbx-bootstrap/tier.go`: + +```go +package main + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// probeShimTier runs `/bin/sh -c 'command -v curl'` and reports whether the +// resolved curl path lives under shimDir. Returns (ok, resolvedPath, err). +// A non-nil error means the probe couldn't be run at all (e.g. /bin/sh +// missing); a successful run with `ok=false` means curl is either absent or +// the system curl is winning over the shim. +func probeShimTier(shimDir string) (bool, string, error) { + cmd := exec.Command("/bin/sh", "-c", "command -v curl") + out, err := cmd.Output() + if err != nil { + // `command -v curl` exits 1 when curl isn't found; that's not an error + // for our purposes — it just means the shim tier didn't apply. + if exitErr, ok := err.(*exec.ExitError); ok && exitErr.ExitCode() == 1 { + return false, "", nil + } + return false, "", fmt.Errorf("probe: %w", err) + } + resolved := strings.TrimSpace(string(out)) + if resolved == "" { + return false, "", nil + } + clean := filepath.Clean(shimDir) + return strings.HasPrefix(resolved, clean+string(filepath.Separator)) || resolved == clean, resolved, nil +} + +// writeTierFile writes the active tier name (e.g. "shim" or "none") followed +// by a trailing newline to path. Atomic via tmp+rename so concurrent readers +// (the SKILL.md tells the agent to `cat` this file) never see a half-written +// value. Creates parent dirs with mode 0755. +func writeTierFile(path, tier string) error { + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return fmt.Errorf("mkdir tier dir: %w", err) + } + tmp := path + ".tmp" + if err := os.WriteFile(tmp, []byte(tier+"\n"), 0o644); err != nil { + return fmt.Errorf("write tmp: %w", err) + } + if err := os.Rename(tmp, path); err != nil { + _ = os.Remove(tmp) + return fmt.Errorf("rename: %w", err) + } + return nil +} +``` + +- [ ] **Step 4: Wire into main** + +Replace the trailing `// Tier probe lands in Task 5.` comment in `cmd/agentsh-sbx-bootstrap/main.go` with the probe step: + +```go + const defaultShimDir = "/usr/lib/agentsh/shims" + shimDir := defaultShimDir + if env := os.Getenv("AGENTSH_SHIM_DIR"); env != "" { + shimDir = env + } + + tier := "none" + if ok, resolved, probeErr := probeShimTier(shimDir); probeErr != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: shim probe failed: %v\n", probeErr) + } else if ok { + tier = "shim" + fmt.Fprintf(os.Stdout, "agentsh-sbx-bootstrap: shim tier active (curl -> %s)\n", resolved) + } else { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: shim tier NOT active (PATH did not yield %s)\n", shimDir) + } + + if err := writeTierFile(defaultTierPath, tier); err != nil { + fmt.Fprintf(os.Stderr, "agentsh-sbx-bootstrap: write tier file: %v\n", err) + os.Exit(1) + } +``` + +(Insert this block immediately after the `waitForSocket` call so the existing structure is preserved.) + +- [ ] **Step 5: Run tests** + +Run: `go test ./cmd/agentsh-sbx-bootstrap/... -v` +Expected: PASS. + +- [ ] **Step 6: Run a quick end-to-end smoke locally** + +Run: +``` +mkdir -p /tmp/sbx-bootstrap-test/shims /tmp/sbx-bootstrap-test/run /tmp/sbx-bootstrap-test/etc +cat <<'EOF' >/tmp/sbx-bootstrap-test/shims/curl +#!/bin/sh +exit 0 +EOF +chmod +x /tmp/sbx-bootstrap-test/shims/curl +PATH=/tmp/sbx-bootstrap-test/shims:$PATH AGENTSH_SHIM_DIR=/tmp/sbx-bootstrap-test/shims \ + go run ./cmd/agentsh-sbx-bootstrap \ + --template configs/policies/coding-agent.yaml \ + --overlay /dev/null \ + --policy /tmp/sbx-bootstrap-test/etc/default.yaml \ + --agentsh /bin/true \ + --server-config /dev/null \ + --socket /tmp/sbx-bootstrap-test/run/sock 2>&1 | tee /tmp/sbx-bootstrap-test/log +``` +Expected: log line "shim tier active (curl -> /tmp/sbx-bootstrap-test/shims/curl)". The waitForSocket will time out but that's expected with `/bin/true` as the daemon; bootstrap continues regardless. + +- [ ] **Step 7: Commit** + +```bash +git add cmd/agentsh-sbx-bootstrap/tier.go cmd/agentsh-sbx-bootstrap/tier_test.go cmd/agentsh-sbx-bootstrap/main.go +git commit -m "bootstrap: shim-tier probe + /run/agentsh/tier writer" +``` + +--- + +## Task 6: Package the new artifacts via .goreleaser.yml + +**Files:** +- Modify: `.goreleaser.yml` + +Add the new bootstrap binary build, the shim symlinks under `/usr/lib/agentsh/shims/`, and the packaged policy template at `/usr/share/agentsh/coding-agent.template.yaml`. The existing `configs/policies/*.yaml` glob already installs the new `coding-agent.yaml` to `/etc/agentsh/policies/`, so that side is automatic. + +- [ ] **Step 1: Add the bootstrap build target** + +In `.goreleaser.yml`, after the `shim-darwin` build (around line 130, before the archives block — find the last `- id: shim-*` block), append a new linux-only build: + +```yaml + - id: sbx-bootstrap-linux + main: ./cmd/agentsh-sbx-bootstrap + binary: agentsh-sbx-bootstrap + env: + - CGO_ENABLED=0 + goos: + - linux + goarch: + - amd64 + - arm64 + ldflags: + - -s -w -X main.version={{.Version}} +``` + +- [ ] **Step 2: Add the sbx-bootstrap build id to the linux .deb/.rpm nfpm `ids:` list** + +In the `nfpms:` block (around line 300), the linux Debian/RPM package's `ids:` list already contains the agentsh and shim builds. Add `sbx-bootstrap-linux`: + +```yaml +nfpms: + - id: agentsh + package_name: agentsh + ids: + - agentsh-linux-amd64 + - agentsh-linux-arm64 + - shim-linux + - unixwrap-linux-amd64 + - unixwrap-linux-arm64 + - stub-linux + - sbx-bootstrap-linux # NEW: bootstrap binary lands in /usr/bin +``` + +- [ ] **Step 3: Add packaged template + shim symlink directory** + +In the same `nfpms:` block's `contents:` section, after the existing `/usr/lib/agentsh/bash_startup.sh` entry, append: + +```yaml + # Coding-agent policy template for Docker Sandboxes mixin bootstrap. + # Installed read-only — the bootstrap writes the merged result to + # /etc/agentsh/policies/default.yaml on each sandbox start. + - src: configs/policies/coding-agent.yaml + dst: /usr/share/agentsh/coding-agent.template.yaml + file_info: + mode: 0644 + + # Shim directory + symlinks (Docker Sandboxes mixin support). + # /usr/lib/agentsh/shims is prepended to PATH inside sandboxes via + # /etc/profile.d/agentsh.sh (written by the mixin kit's initFiles). + - dst: /usr/lib/agentsh/shims + type: dir + file_info: + mode: 0755 + - dst: /usr/lib/agentsh/shims/bash + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/sh + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/curl + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/wget + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/pip + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/pip3 + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/npm + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/node + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/git + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/python + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/python3 + src: /usr/bin/agentsh-shell-shim + type: symlink + - dst: /usr/lib/agentsh/shims/rm + src: /usr/bin/agentsh-shell-shim + type: symlink + + # Packaged policy reference (also lives in repo at docs/policy-reference.md). + - src: docs/policy-reference.md + dst: /usr/share/doc/agentsh/policy-reference.md + file_info: + mode: 0644 +``` + +(`docs/policy-reference.md` is created in Task 7. Leave the reference here so the packaging stays close to other doc entries.) + +- [ ] **Step 4: Validate goreleaser config** + +Run: `goreleaser check` +Expected: PASS, no warnings. (If goreleaser is not installed, install with `go install github.com/goreleaser/goreleaser/v2@latest`.) + +- [ ] **Step 5: Build a snapshot to confirm artifacts produce** + +Run: `goreleaser build --snapshot --clean --single-target --id sbx-bootstrap-linux` +Expected: success; binary at `dist/sbx-bootstrap-linux_linux_amd64_v1/agentsh-sbx-bootstrap`. + +- [ ] **Step 6: Commit** + +```bash +git add .goreleaser.yml +git commit -m "release: package sbx-bootstrap binary, shim symlinks, policy template" +``` + +--- + +## Task 7: Packaged policy reference doc + +**Files:** +- Create: `docs/policy-reference.md` + +The SKILL.md points the agent at `/usr/share/doc/agentsh/policy-reference.md` for the full grammar. This task lands a single user-facing reference that lives in the repo and is packaged into the OS bundle by Task 6's `nfpms.contents` entry. + +This document is descriptive — no tests run against it directly. The validation gate is: SKILL.md (Task 9) references it and the smoke test (Task 9) confirms the file exists in the sandbox. + +- [ ] **Step 1: Create the reference doc** + +Create `docs/policy-reference.md`: + +```markdown +# AgentSH policy reference (Docker Sandboxes edition) + +This file ships at `/usr/share/doc/agentsh/policy-reference.md` inside any +Docker Sandbox that has the AgentSH mixin kit installed. It's the canonical +reference the agent's SKILL.md points at when you (or the agent) want to add +or change a rule. + +For the full schema documented inline with examples, see +`/etc/agentsh/policies/default.yaml` — the merged policy the daemon is +currently enforcing. + +## Inspecting the live state + +| Question | Run | +|---|---| +| What enforcement tier is active? | `cat /run/agentsh/tier` (one of `shim`, `none`) | +| What policy is being enforced right now? | `cat /etc/agentsh/policies/default.yaml` | +| What are my overrides on top of the baked policy? | `cat /home/agent/.agentsh/policy.yaml` | +| Is the daemon running? | `pgrep -af 'agentsh server'` | + +## Adding rules — `~/.agentsh/policy.yaml` + +Write a partial policy. The bootstrap merges it on top of the baked +`coding-agent` template on next sandbox start. Rules that share a `name` with +a baked rule replace it; rules with new names append after the baked set. + +```yaml +version: 1 +name: my-overrides + +file_rules: + - name: allow-extra-write-area + paths: ["/data/**"] + operations: [write, create, mkdir, rename] + decision: allow + + - name: allow-workspace-write # overrides the baked rule by name + paths: ["/workspace", "/workspace/**", "/scratch/**"] + operations: [write, create, mkdir, chmod, rename] + decision: allow + +command_rules: + - name: deny-aws-cli + commands: [aws] + decision: deny + message: "aws-cli is not permitted in this sandbox" +``` + +## Rule kinds at a glance + +- `file_rules` — file open/read/write/delete/stat/list, by glob path. Decisions: `allow`, `deny`, `approve`, `audit`, `soft_delete`, `redirect`. +- `command_rules` — process exec, by command name + optional argument regex. Decisions: `allow`, `deny`, `approve`, `audit`, `redirect`. +- `signal_rules` — signal sending. Decisions: `allow`, `deny`, `audit`, `approve`, `redirect`, `absorb`. +- `network_rules` — outbound connect by domain / port / CIDR. The Docker Sandbox proxy is the primary outbound-network gate inside a sandbox; AgentSH's network rules are layered on top and apply *before* the proxy. +- `unix_socket_rules` — AF_UNIX socket connect/bind/listen. + +Each rule has `name`, `description`, the kind-specific selectors, `decision`, and an optional `message` (Go template; available variables: `.Path`, `.Command`, `.Args`, `.Decision`, `.Signal`, `.PID`). + +## Where things live + +| Path | Owner | Purpose | +|---|---|---| +| `/usr/share/agentsh/coding-agent.template.yaml` | OS package, read-only | Baked-in policy the bootstrap reads | +| `/home/agent/.agentsh/policy.yaml` | You | Override fragment (optional) | +| `/etc/agentsh/policies/default.yaml` | bootstrap (regenerated each start) | What the daemon enforces | +| `/etc/agentsh/config.yaml` | OS package | Daemon server config | +| `/run/agentsh/tier` | bootstrap | Active enforcement tier | +| `/run/agentsh/agentsh.sock` | daemon | Daemon control socket | +| `/var/log/agentsh/daemon.log` | daemon | Daemon stdout+stderr | +| `/var/log/agentsh/bootstrap.log` | bootstrap | Startup banner + tier probe result | + +## Decision semantics quick reference + +- `allow` — operation proceeds. +- `audit` — operation proceeds, emit an audit event. +- `deny` — operation refused; the agent gets EACCES (or equivalent). +- `approve` — operation blocks until a human approves out-of-band. +- `soft_delete` — for file delete/rmdir only: the path is moved to `/var/lib/agentsh/trash/` instead of being removed. Recoverable. +- `redirect` — for `command_rules` and `connect_redirects`: the operation is rewritten to a different command or destination. + +## Reloading + +In v1, the bootstrap re-runs only at sandbox start. To pick up a new +`~/.agentsh/policy.yaml`, restart the sandbox via Docker Sandboxes. v1.1 may +add an in-place reload. +``` + +- [ ] **Step 2: Commit** + +```bash +git add docs/policy-reference.md +git commit -m "docs: policy-reference.md packaged with the kit for in-sandbox use" +``` + +--- + +## Task 8: install.sh installer script + +**Files:** +- Create: `scripts/install-agentsh.sh` +- Test: `scripts/install-agentsh_test.sh` + +The mixin kit's `install` command does `curl … install.sh | sh`. This task creates the script. It detects the package manager and installs the matching release artifact for the host's architecture. + +The script is self-contained Bash. Validation is via `shellcheck` plus a tiny driver that invokes the script in a dry-run mode. + +- [ ] **Step 1: Write a failing test** + +Create `scripts/install-agentsh_test.sh`: + +```bash +#!/usr/bin/env bash +# Smoke test for scripts/install-agentsh.sh. +# Runs the script with AGENTSH_DRY_RUN=1 and asserts it picks the right +# package manager + URL based on AGENTSH_FORCE_DETECT. + +set -euo pipefail + +here=$(cd "$(dirname "$0")" && pwd) +script="$here/install-agentsh.sh" + +# Test 1: detects dpkg +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=dpkg AGENTSH_ARCH=amd64 "$script" 2>&1 || true) +echo "$out" | grep -q "dpkg.*agentsh_.*_linux_amd64.deb" || { + echo "FAIL: dpkg branch missing or wrong URL" + echo "----- output -----" + echo "$out" + exit 1 +} + +# Test 2: detects rpm +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=rpm AGENTSH_ARCH=amd64 "$script" 2>&1 || true) +echo "$out" | grep -q "rpm.*agentsh-.*\.x86_64\.rpm" || { + echo "FAIL: rpm branch missing or wrong URL" + echo "----- output -----" + echo "$out" + exit 1 +} + +# Test 3: detects apk +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=apk AGENTSH_ARCH=amd64 "$script" 2>&1 || true) +echo "$out" | grep -q "apk.*agentsh_.*_linux_amd64.apk" || { + echo "FAIL: apk branch missing or wrong URL" + echo "----- output -----" + echo "$out" + exit 1 +} + +# Test 4: unknown package manager fails fast +if AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=none "$script" 2>/dev/null; then + echo "FAIL: expected non-zero exit when no package manager detected" + exit 1 +fi + +# Test 5: arm64 selects arm64 artifact +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=dpkg AGENTSH_ARCH=arm64 "$script" 2>&1 || true) +echo "$out" | grep -q "agentsh_.*_linux_arm64.deb" || { + echo "FAIL: arm64 URL not generated" + echo "----- output -----" + echo "$out" + exit 1 +} + +echo "OK install-agentsh.sh" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `chmod +x scripts/install-agentsh_test.sh && ./scripts/install-agentsh_test.sh` +Expected: FAIL — `scripts/install-agentsh.sh` does not exist. + +- [ ] **Step 3: Create the installer** + +Create `scripts/install-agentsh.sh`: + +```bash +#!/bin/sh +# install-agentsh.sh — install AgentSH into a Linux container/VM. +# +# Used by the Docker Sandboxes mixin kit; also safe to run interactively +# on any supported Linux. Detects the host's package manager and +# downloads the matching `.deb`, `.rpm`, or `.apk` from the latest +# AgentSH GitHub release. +# +# Env knobs (all optional): +# AGENTSH_VERSION Pinned release tag (default: latest) +# AGENTSH_ARCH amd64 | arm64 (default: detected via uname -m) +# AGENTSH_DRY_RUN 1 = print actions without downloading/installing +# AGENTSH_FORCE_DETECT dpkg | rpm | apk | none (test hook) +# +# Exit codes: +# 0 success +# 1 detection failure (no supported package manager) +# 2 download failure +# 3 install failure + +set -eu + +base_url() { + if [ -n "${AGENTSH_VERSION:-}" ]; then + printf '%s' "https://github.com/erans/agentsh/releases/download/${AGENTSH_VERSION}" + else + printf '%s' "https://github.com/erans/agentsh/releases/latest/download" + fi +} + +detect_arch() { + if [ -n "${AGENTSH_ARCH:-}" ]; then + printf '%s' "$AGENTSH_ARCH" + return + fi + case "$(uname -m)" in + x86_64|amd64) printf 'amd64' ;; + aarch64|arm64) printf 'arm64' ;; + *) printf 'unsupported' ;; + esac +} + +detect_pm() { + if [ -n "${AGENTSH_FORCE_DETECT:-}" ]; then + printf '%s' "$AGENTSH_FORCE_DETECT" + return + fi + if command -v dpkg >/dev/null 2>&1; then printf 'dpkg'; return; fi + if command -v rpm >/dev/null 2>&1; then printf 'rpm'; return; fi + if command -v apk >/dev/null 2>&1; then printf 'apk'; return; fi + printf 'none' +} + +run() { + if [ "${AGENTSH_DRY_RUN:-}" = "1" ]; then + echo "DRY: $*" + else + "$@" + fi +} + +main() { + arch=$(detect_arch) + if [ "$arch" = "unsupported" ]; then + echo "install-agentsh: unsupported architecture $(uname -m)" >&2 + exit 1 + fi + + pm=$(detect_pm) + case "$pm" in + dpkg) + url="$(base_url)/agentsh_VERSION_linux_${arch}.deb" + tmp="/tmp/agentsh.deb" + echo "install-agentsh: using dpkg ($url)" + run sh -c "curl -fsSL '$url' -o '$tmp'" || exit 2 + run dpkg -i "$tmp" || exit 3 + ;; + rpm) + rpmarch=$([ "$arch" = "amd64" ] && echo x86_64 || echo aarch64) + url="$(base_url)/agentsh-VERSION.${rpmarch}.rpm" + tmp="/tmp/agentsh.rpm" + echo "install-agentsh: using rpm ($url)" + run sh -c "curl -fsSL '$url' -o '$tmp'" || exit 2 + run rpm -Uvh --replacepkgs "$tmp" || exit 3 + ;; + apk) + url="$(base_url)/agentsh_VERSION_linux_${arch}.apk" + tmp="/tmp/agentsh.apk" + echo "install-agentsh: using apk ($url)" + run sh -c "curl -fsSL '$url' -o '$tmp'" || exit 2 + run apk add --allow-untrusted "$tmp" || exit 3 + ;; + none) + echo "install-agentsh: no supported package manager (dpkg/rpm/apk) found" >&2 + exit 1 + ;; + *) + echo "install-agentsh: unknown package manager $pm" >&2 + exit 1 + ;; + esac + + echo "install-agentsh: done" +} + +main "$@" +``` + +- [ ] **Step 4: Run the test** + +Run: `chmod +x scripts/install-agentsh.sh && ./scripts/install-agentsh_test.sh` +Expected: PASS, prints `OK install-agentsh.sh`. + +- [ ] **Step 5: Run shellcheck on both scripts** + +Run: `shellcheck scripts/install-agentsh.sh scripts/install-agentsh_test.sh` +Expected: no errors. (If shellcheck isn't installed: `sudo apt-get install -y shellcheck` or skip with a note.) + +- [ ] **Step 6: Commit** + +```bash +git add scripts/install-agentsh.sh scripts/install-agentsh_test.sh +git commit -m "scripts: install-agentsh.sh for the Docker Sandboxes mixin kit" +``` + +--- + +## Task 9: The mixin kit directory itself + +**Files:** +- Create: `docker/sbx-kit/spec.yaml` +- Create: `docker/sbx-kit/README.md` +- Create: `docker/sbx-kit/files/workspace/.claude/skills/agentsh/SKILL.md` +- Create: `docker/sbx-kit/files/home/agent/.agentsh/policy.yaml` +- Create: `docker/sbx-kit/tests/coding-agent-smoke.sh` +- Test: `docker/sbx-kit/spec_test.go` + +The kit tree gets a Go test that parses `spec.yaml` and checks the structural invariants we care about (manifest fields, shape of `commands.install` / `initFiles` / `startup`). The test lives under `docker/sbx-kit/` and runs as part of `go test ./...`. + +- [ ] **Step 1: Write the failing kit-spec test** + +Create `docker/sbx-kit/spec_test.go`: + +```go +// Package sbxkit hosts a structural test for spec.yaml so a fresh engineer +// can't break the manifest format without CI catching it. +package sbxkit + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "gopkg.in/yaml.v3" +) + +type kitSpec struct { + SchemaVersion string `yaml:"schemaVersion"` + Kind string `yaml:"kind"` + Name string `yaml:"name"` + DisplayName string `yaml:"displayName"` + Description string `yaml:"description"` + Commands kitCmds `yaml:"commands"` +} + +type kitCmds struct { + Install []kitInstall `yaml:"install"` + InitFiles []kitInitFile `yaml:"initFiles"` + Startup []kitStartup `yaml:"startup"` +} + +type kitInstall struct { + Command string `yaml:"command"` + User string `yaml:"user"` + Description string `yaml:"description"` +} + +type kitInitFile struct { + Path string `yaml:"path"` + Content string `yaml:"content"` + Mode string `yaml:"mode"` +} + +type kitStartup struct { + Command []string `yaml:"command"` + User string `yaml:"user"` + Background bool `yaml:"background"` + Description string `yaml:"description"` +} + +func loadSpec(t *testing.T) *kitSpec { + t.Helper() + path := filepath.Join("spec.yaml") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read spec.yaml: %v", err) + } + var s kitSpec + if err := yaml.Unmarshal(data, &s); err != nil { + t.Fatalf("parse spec.yaml: %v", err) + } + return &s +} + +func TestSpecYAML_TopLevel(t *testing.T) { + s := loadSpec(t) + if s.SchemaVersion != "1" { + t.Errorf("schemaVersion = %q, want %q", s.SchemaVersion, "1") + } + if s.Kind != "mixin" { + t.Errorf("kind = %q, want %q", s.Kind, "mixin") + } + if s.Name != "agentsh" { + t.Errorf("name = %q, want %q", s.Name, "agentsh") + } +} + +func TestSpecYAML_InstallReferencesInstallScript(t *testing.T) { + s := loadSpec(t) + if len(s.Commands.Install) != 1 { + t.Fatalf("expected exactly one install command, got %d", len(s.Commands.Install)) + } + cmd := s.Commands.Install[0].Command + if !strings.Contains(cmd, "install.sh") { + t.Errorf("install command does not curl install.sh: %q", cmd) + } + if s.Commands.Install[0].User != "0" { + t.Errorf("install user = %q, want %q (root)", s.Commands.Install[0].User, "0") + } +} + +func TestSpecYAML_InitFilesSetShimPath(t *testing.T) { + s := loadSpec(t) + var foundProfile, foundEnv bool + for _, f := range s.Commands.InitFiles { + if f.Path == "/etc/profile.d/agentsh.sh" { + foundProfile = true + if !strings.Contains(f.Content, "/usr/lib/agentsh/shims") { + t.Errorf("profile.d entry does not export shim PATH: %q", f.Content) + } + } + if f.Path == "/etc/environment.d/10-agentsh.conf" { + foundEnv = true + if !strings.Contains(f.Content, "/usr/lib/agentsh/shims") { + t.Errorf("environment.d entry does not include shim PATH: %q", f.Content) + } + } + } + if !foundProfile { + t.Error("initFiles missing /etc/profile.d/agentsh.sh entry") + } + if !foundEnv { + t.Error("initFiles missing /etc/environment.d/10-agentsh.conf entry") + } +} + +func TestSpecYAML_StartupInvokesBootstrap(t *testing.T) { + s := loadSpec(t) + if len(s.Commands.Startup) != 1 { + t.Fatalf("expected exactly one startup command, got %d", len(s.Commands.Startup)) + } + cmd := s.Commands.Startup[0] + if len(cmd.Command) == 0 || cmd.Command[0] != "/usr/bin/agentsh-sbx-bootstrap" { + t.Errorf("startup command = %v, want first element /usr/bin/agentsh-sbx-bootstrap", cmd.Command) + } + if !cmd.Background { + t.Error("startup command must be background:true") + } +} + +func TestKitFiles_SkillExists(t *testing.T) { + if _, err := os.Stat(filepath.Join("files", "workspace", ".claude", "skills", "agentsh", "SKILL.md")); err != nil { + t.Errorf("SKILL.md missing: %v", err) + } +} + +func TestKitFiles_OverrideStubExists(t *testing.T) { + if _, err := os.Stat(filepath.Join("files", "home", "agent", ".agentsh", "policy.yaml")); err != nil { + t.Errorf("override stub missing: %v", err) + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `go test ./docker/sbx-kit/... -v` +Expected: FAIL — spec.yaml missing. + +- [ ] **Step 3: Create the spec.yaml** + +Create `docker/sbx-kit/spec.yaml`: + +```yaml +# AgentSH mixin kit for Docker Sandboxes. +# See docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md +# Invoke: sbx run --kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit + +schemaVersion: "1" +kind: mixin +name: agentsh +displayName: AgentSH +description: Policy-enforced execution gateway for AI coding agents + +commands: + install: + - command: "/bin/sh -c 'curl -fsSL https://github.com/erans/agentsh/releases/latest/download/install.sh | sh'" + user: "0" + description: Install agentsh from the latest GitHub release + + initFiles: + - path: /etc/profile.d/agentsh.sh + content: 'export PATH=/usr/lib/agentsh/shims:$PATH' + mode: "0644" + + - path: /etc/environment.d/10-agentsh.conf + content: 'PATH=/usr/lib/agentsh/shims:/usr/local/bin:/usr/bin:/bin' + mode: "0644" + + startup: + - command: ["/usr/bin/agentsh-sbx-bootstrap"] + user: "0" + background: true + description: Merge policy, start agentsh server, probe enforcement tier +``` + +- [ ] **Step 4: Create the override stub** + +Create `docker/sbx-kit/files/home/agent/.agentsh/policy.yaml`: + +```yaml +# AgentSH user-override fragment. +# +# Anything you write here merges on top of the baked coding-agent policy at +# /usr/share/agentsh/coding-agent.template.yaml on the next sandbox start. +# Rules that share a `name` with a baked rule replace it; rules with new +# names append after the baked set. +# +# Reference: /usr/share/doc/agentsh/policy-reference.md +# +# Example (uncomment to use): +# +# version: 1 +# name: my-overrides +# file_rules: +# - name: allow-extra-write-area +# paths: ["/data/**"] +# operations: [write, create, mkdir, rename] +# decision: allow +``` + +- [ ] **Step 5: Create the SKILL.md** + +Create `docker/sbx-kit/files/workspace/.claude/skills/agentsh/SKILL.md`: + +```markdown +--- +name: agentsh +description: Use when the user asks about AgentSH policy, sandbox enforcement, audit events, or what file/network/command operations are allowed inside this Docker Sandbox. Read /run/agentsh/tier for the active enforcement mode, /etc/agentsh/policies/default.yaml for the merged active policy, and /home/agent/.agentsh/policy.yaml for the user-overlay fragment. +--- + +# AgentSH in this sandbox + +This sandbox has AgentSH installed via the Docker Sandboxes mixin kit. It +enforces a policy on file, network, command, and signal operations performed +by you and your subprocesses. + +## Inspect the live state + +| Question | Run | +|---|---| +| What enforcement tier is active? | `cat /run/agentsh/tier` (one of `shim`, `none`) | +| What policy is being enforced right now? | `cat /etc/agentsh/policies/default.yaml` | +| What are my overrides on top of the baked policy? | `cat /home/agent/.agentsh/policy.yaml` | +| Is the daemon running? | `pgrep -af 'agentsh server'` | +| Full grammar reference | `cat /usr/share/doc/agentsh/policy-reference.md` | + +## Extend the policy + +Write a partial YAML policy to `/home/agent/.agentsh/policy.yaml`. The +bootstrap merges it on top of the baked `coding-agent` template on the next +sandbox start. Rules that share a `name` with a baked rule replace it; +rules with new names append. + +Minimal example: + +```yaml +version: 1 +name: my-overrides +file_rules: + - name: allow-data-area + paths: ["/data/**"] + operations: [write, create, mkdir, rename] + decision: allow +``` + +Restart the sandbox via Docker Sandboxes to pick up the change. In-place +reload is not supported in v1. + +## Common patterns + +- Let the agent write outside `/workspace`: add a `file_rules` entry with `decision: allow` for the new paths. +- Block a command unconditionally: add a `command_rules` entry with `decision: deny`. +- Soft-delete instead of hard-delete on a path: `decision: soft_delete` in a `file_rules` entry for `delete`/`rmdir` operations. +- Audit (don't block) a pattern: `decision: audit`. + +For the full grammar — every field, every decision value, available +templating variables — read `/usr/share/doc/agentsh/policy-reference.md`. + +## When the tier is `none` + +That means the bootstrap couldn't confirm the shim PATH made it past the +agent's entrypoint, OR the daemon failed to start. Check +`/var/log/agentsh/bootstrap.log` and `/var/log/agentsh/daemon.log` for the +reason. The agent will continue to run — AgentSH never blocks the agent's +startup — but enforcement is degraded to advisory. +``` + +- [ ] **Step 6: Create the smoke test script** + +Create `docker/sbx-kit/tests/coding-agent-smoke.sh`: + +```bash +#!/usr/bin/env bash +# Manual smoke test exercised inside a Docker Sandbox that has the AgentSH +# mixin kit installed. Run via: +# sbx exec bash /workspace/.claude/skills/agentsh/coding-agent-smoke.sh +# +# Or copy this file into the sandbox manually and run it as the agent user. +# +# Each check prints PASS / FAIL. Exits non-zero on any FAIL. + +set -u + +pass=0 +fail=0 + +assert() { + local label="$1" + local got="$2" + local want="$3" + if [ "$got" = "$want" ]; then + echo "PASS: $label" + pass=$((pass+1)) + else + echo "FAIL: $label (got=%q want=%q)" + printf 'FAIL: %s (got=%q, want=%q)\n' "$label" "$got" "$want" >&2 + fail=$((fail+1)) + fi +} + +assert_contains() { + local label="$1" + local got="$2" + local want="$3" + if printf '%s' "$got" | grep -q -- "$want"; then + echo "PASS: $label" + pass=$((pass+1)) + else + echo "FAIL: $label (output did not contain $want)" + echo "----- got: -----" + printf '%s\n' "$got" + echo "----------------" + fail=$((fail+1)) + fi +} + +# Check 1: tier file says shim +got=$(cat /run/agentsh/tier 2>/dev/null || echo missing) +assert "tier file = shim" "$got" "shim" + +# Check 2: curl resolves under the shim dir +resolved=$(command -v curl) +assert_contains "curl resolves under shim dir" "$resolved" "/usr/lib/agentsh/shims" + +# Check 3: cat ~/.ssh/id_rsa is denied (no such file is fine; we expect either ENOENT or EACCES via deny) +mkdir -p "$HOME/.ssh" +printf 'fake-key\n' > "$HOME/.ssh/id_rsa.smoke" +out=$(cat "$HOME/.ssh/id_rsa.smoke" 2>&1) && rc=0 || rc=$? +rm -f "$HOME/.ssh/id_rsa.smoke" +if [ "$rc" -ne 0 ]; then + echo "PASS: cat ~/.ssh/id_rsa.smoke denied (rc=$rc)" + pass=$((pass+1)) +else + echo "FAIL: cat ~/.ssh/id_rsa.smoke succeeded — deny rule did not fire" + echo "----- got: -----" + echo "$out" + echo "----------------" + fail=$((fail+1)) +fi + +# Check 4: sudo is denied +out=$(sudo whoami 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 0 ]; then + echo "PASS: sudo denied (rc=$rc)" + pass=$((pass+1)) +else + echo "FAIL: sudo succeeded — deny rule did not fire" + echo "----- got: -----" + echo "$out" + echo "----------------" + fail=$((fail+1)) +fi + +# Check 5: soft-delete on /workspace +mkdir -p /workspace +echo "$$" > /workspace/smoke.tmp +rm /workspace/smoke.tmp 2>/dev/null || true +if [ -f /workspace/smoke.tmp ]; then + echo "FAIL: /workspace/smoke.tmp still present after rm" + fail=$((fail+1)) +else + # Look for it in the trash directory + if find /var/lib/agentsh/trash -name smoke.tmp 2>/dev/null | grep -q smoke.tmp; then + echo "PASS: soft-delete recoverable" + pass=$((pass+1)) + else + echo "FAIL: soft-delete trash entry not found" + fail=$((fail+1)) + fi +fi + +echo +echo "summary: $pass pass, $fail fail" +exit $([ "$fail" -eq 0 ] && echo 0 || echo 1) +``` + +- [ ] **Step 7: Create the kit README** + +Create `docker/sbx-kit/README.md`: + +```markdown +# AgentSH mixin kit for Docker Sandboxes + +This is a [Docker Sandboxes mixin kit](https://docs.docker.com/ai/sandboxes/customize/kits/) +that installs [AgentSH](https://github.com/erans/agentsh) into any sandbox at +creation and routes the agent's command-level activity through a +coding-agent-tuned policy. + +## Use + +``` +sbx run --kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit +``` + +Works with `claude`, `opencode`, `gemini`, and any agent kit derived from +`docker/sandbox-templates:shell-docker`. + +## Verify + +``` +sbx exec cat /run/agentsh/tier # expect: shim +sbx exec cat /etc/agentsh/policies/default.yaml +sbx exec pgrep -af 'agentsh server' +``` + +For a deeper smoke test, run `tests/coding-agent-smoke.sh` inside the +sandbox. + +## OpenCode / Gemini setup + +Claude Code auto-discovers `.claude/skills/agentsh/SKILL.md`. For other +agents, copy the SKILL into your agent's discovery path: + +``` +sbx exec cp /workspace/.claude/skills/agentsh/SKILL.md /workspace/AGENTS.md +``` + +(Or symlink, or merge with your own `AGENTS.md` — whatever fits your flow.) + +## Logs + +| File | Purpose | +|---|---| +| `/var/log/agentsh/bootstrap.log` | Startup banner, policy-merge result, tier-probe result | +| `/var/log/agentsh/daemon.log` | Daemon stdout+stderr | + +## v1 enforcement tier + +v1 ships shim-tier interception only: subprocess execs of common commands +are routed through AgentSH's shim binary. LD_PRELOAD and ptrace tiers are +planned (see the spec under +`docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md`). + +## Override the policy + +Write a partial YAML policy to `/home/agent/.agentsh/policy.yaml` inside the +sandbox. See `/usr/share/doc/agentsh/policy-reference.md` for the grammar. +Restart the sandbox to apply. +``` + +- [ ] **Step 8: Run the kit test** + +Run: `go test ./docker/sbx-kit/... -v` +Expected: PASS. + +- [ ] **Step 9: Run the full test suite to confirm nothing else broke** + +Run: `go test ./... -count=1 -short` +Expected: PASS (or pre-existing flakes; new tests must pass). + +- [ ] **Step 10: Commit** + +```bash +git add docker/sbx-kit/ +git commit -m "sbx: Docker Sandboxes mixin kit at docker/sbx-kit/" +``` + +--- + +## Task 10: Publish install.sh via the release workflow + +**Files:** +- Modify: `.github/workflows/release.yml` +- Modify: `.goreleaser.yml` + +The mixin's `install` step curls `https://github.com/erans/agentsh/releases/latest/download/install.sh`. For that URL to resolve, the release pipeline must upload `scripts/install-agentsh.sh` as a release asset on every tag. GoReleaser's `release.extra_files` is the right hook. + +- [ ] **Step 1: Add install.sh as a release extra file** + +In `.goreleaser.yml`, find the `release:` top-level key (or add one if missing — it sits as a sibling of `nfpms:`, `archives:`, `checksum:`). Add or extend the `extra_files` list: + +```yaml +release: + extra_files: + - glob: scripts/install-agentsh.sh + name_template: install.sh +``` + +If the `release:` block doesn't exist yet, add the block at the end of `.goreleaser.yml` (before any closing). If it exists with other settings, just add the `extra_files` key. + +- [ ] **Step 2: Validate goreleaser config** + +Run: `goreleaser check` +Expected: PASS. + +- [ ] **Step 3: Verify the workflow exercises the asset path** + +Read `.github/workflows/release.yml`. Confirm it runs `goreleaser release` (it does — search for `goreleaser`). No further edits should be required; the `extra_files` setting plumbs into `goreleaser release` automatically. + +- [ ] **Step 4: Local snapshot test** + +Run: +``` +goreleaser release --snapshot --clean --skip=publish +``` +Look in `dist/` for `install.sh`. Expected: file present alongside `.deb`/`.rpm`/`.apk` artifacts. + +- [ ] **Step 5: Commit** + +```bash +git add .goreleaser.yml +git commit -m "release: publish install.sh as a release asset for the sbx mixin kit" +``` + +--- + +## Task 11: Final integration and end-to-end build verification + +**Files:** +- (verification only — no source changes expected) + +This task is the verification gate before merging. Run all the things that should now be green. + +- [ ] **Step 1: Full Go test suite** + +Run: `go test ./... -count=1` +Expected: PASS for everything new; pre-existing flakes (FlushLoop, TransportLoss) may show but should be documented as known. + +- [ ] **Step 2: Cross-compile verification** + +Run: `GOOS=windows go build ./...` +Expected: PASS. The bootstrap binary is Linux-only by goreleaser config; verify it doesn't break the Windows build by accident. + +- [ ] **Step 3: Snapshot release** + +Run: `goreleaser release --snapshot --clean --skip=publish` +Expected: `dist/` contains: +- `agentsh-sbx-bootstrap` binaries (linux amd64+arm64) +- `agentsh__linux_amd64.deb` (and arm64, rpm, archlinux variants) +- `install.sh` + +- [ ] **Step 4: Inspect a .deb to confirm payload layout** + +Run: `dpkg-deb -c dist/agentsh_*_linux_amd64.deb | grep -E '/(usr/lib/agentsh/shims|usr/share/agentsh|usr/bin/agentsh-sbx-bootstrap)'` +Expected: lists the new shim symlinks, `coding-agent.template.yaml`, and the bootstrap binary. + +- [ ] **Step 5: Manual sandbox validation matrix** + +Run the matrix from spec §11 against a live Docker Sandboxes install. Each agent gets `--kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit&ref=`: + +``` +sbx run claude --kit git+...#dir=docker/sbx-kit +sbx run opencode --kit git+...#dir=docker/sbx-kit +sbx run gemini --kit git+...#dir=docker/sbx-kit +``` + +For each, run `tests/coding-agent-smoke.sh` and record results. Pass criteria: +- tier=shim +- curl resolves under `/usr/lib/agentsh/shims/` +- `cat ~/.ssh/id_rsa.smoke` denied +- `sudo whoami` denied +- soft-delete recoverable + +If any agent kit fails the matrix, file a follow-up task and document the failure mode in the kit README's "Known limitations" section before tagging the release. + +- [ ] **Step 6: Final commit (if any docs updated during validation)** + +```bash +git add -A +git status +git commit -m "sbx: validation results from manual matrix" || echo "nothing to commit" +``` + +--- + +## Self-review + +Coverage check against the spec: +- §1 Goal — Tasks 9, 10, 11 (kit tree + git URL invocation working end-to-end). ✅ +- §2 Background — informational, not implemented. ✅ +- §3 Non-goals — preserved as comments/scope in Tasks 5 and the kit README. ✅ +- §4 Enforcement model (tier 1 shim) — Tasks 5, 9. Tiers 2/3 explicitly parked. ✅ +- §5 Kit layout — Task 9. ✅ +- §6 spec.yaml — Task 9. ✅ +- §7 Install + startup flow — Tasks 3 (merge), 4 (daemon spawn + socket wait), 5 (tier probe + file). ✅ +- §8 Default policy — Task 1. ✅ +- §9 Self-teaching docs — Tasks 7 (policy-reference.md), 9 (SKILL.md + README). ✅ +- §10 Prerequisites — Tasks 1 (#5 template), 2 (#4 merge helper), 3-5 (#3 bootstrap), 6 (#2 packaging), 7 (#6 docs), 8 (#1 install.sh), 10 (#1 release upload). ✅ +- §11 Validation — Task 11 step 5 covers the manual matrix. ✅ +- §12 Risk register — mitigations live in §7 of the spec and are inherent in the bootstrap design; no separate task. ✅ +- §13 Out of scope — preserved across the tier-name string, the kit README, and the bootstrap probe stub. ✅ + +Placeholder scan: no `TBD`, `TODO`, or "fill in later" in the plan. ✅ + +Type consistency: `mergeAndWritePolicy(tmpl, overlay, out string)`, `MergeOverlay(base, overlay *Policy) *Policy`, `probeShimTier(shimDir string) (bool, string, error)`, `writeTierFile(path, tier string) error`, `spawnDaemon(bin string, args []string, logPath string) (*exec.Cmd, error)`, `waitForSocket(sockPath string, deadline time.Duration) error` — all consistent across Tasks 3-5. ✅ + +Plan covers the full spec; no gaps. diff --git a/docs/superpowers/plans/2026-05-11-sbx-agent-wrap.md b/docs/superpowers/plans/2026-05-11-sbx-agent-wrap.md new file mode 100644 index 000000000..effe3b85b --- /dev/null +++ b/docs/superpowers/plans/2026-05-11-sbx-agent-wrap.md @@ -0,0 +1,802 @@ +# Auto-wrap Agent Harness Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** After the Docker Sandboxes mixin kit installs, the agent harness (`claude`, `opencode`, `gemini`, `codex`, `cursor`) launches under `agentsh wrap` via `/usr/local/bin/` symlink wrappers. Fail-CLOSED: if `agentsh wrap` can't engage cleanly, the wrapper refuses to launch the agent. + +**Architecture:** Two new shell scripts ship in every `.deb`/`.rpm`/`.apk` at `/usr/lib/agentsh/`: `agent-wrap` (the wrapper that runs `agentsh wrap -- /usr/bin/`) and `install-agent-wrappers.sh` (probes `/usr/bin` for known agents and symlinks `/usr/local/bin/ -> /usr/lib/agentsh/agent-wrap`). The kit's `spec.yaml install:` block invokes the installer after `install.sh` finishes. PATH precedence (`/usr/local/bin` before `/usr/bin`) makes the agent kit's `exec claude` resolve to our wrapper. + +**Tech Stack:** POSIX shell (no bashisms — Alpine sandboxes use busybox sh), GoReleaser nfpms for packaging, Docker Sandboxes `spec.yaml` schema v1. + +**Spec reference:** `docs/superpowers/specs/2026-05-11-sbx-agent-wrap.md`. + +--- + +## Task 1: Wrapper script (`agent-wrap.sh`) + tests + +**Files:** +- Create: `packaging/agent-wrap.sh` +- Create: `packaging/agent-wrap_test.sh` + +The wrapper is the brain of this feature. It runs per-agent-launch and decides whether to engage `agentsh wrap` or refuse. TDD via a self-contained shell test that stubs the real binary, `agentsh`, and the tier file. + +- [ ] **Step 1: Write the failing test** + +Create `packaging/agent-wrap_test.sh`: + +```bash +#!/usr/bin/env bash +# Smoke test for packaging/agent-wrap.sh. Sets up an isolated tempdir with a +# fake agent binary, fake agentsh, and fake tier file, then drives the wrapper +# through all 5 scenarios. + +set -euo pipefail + +here=$(cd "$(dirname "$0")" && pwd) +wrap="$here/agent-wrap.sh" + +if [ ! -x "$wrap" ]; then + echo "FAIL: $wrap missing or not executable" + exit 1 +fi + +# Create an isolated harness. +tmp=$(mktemp -d -t agent-wrap-test.XXXXXX) +trap 'rm -rf "$tmp"' EXIT + +mkdir -p "$tmp/usr/bin" "$tmp/usr/local/bin" "$tmp/agentsh-bin" "$tmp/run/agentsh" + +# Fake real agent binary that announces itself. +cat >"$tmp/usr/bin/claude" <<'EOF' +#!/bin/sh +echo "REAL-CLAUDE: $*" +EOF +chmod +x "$tmp/usr/bin/claude" + +# Fake agentsh that announces itself when called as `agentsh wrap`. +cat >"$tmp/agentsh-bin/agentsh" <<'EOF' +#!/bin/sh +echo "AGENTSH-WRAP: $*" +EOF +chmod +x "$tmp/agentsh-bin/agentsh" + +# Symlink the wrapper as if installed. +ln -s "$wrap" "$tmp/usr/local/bin/claude" + +# Helper: run the symlinked wrapper with an overridden FAKE_ROOT (the wrapper +# reads FAKE_ROOT to relocate /usr/bin, /run/agentsh, etc. — see Task 1 Step 3 +# for how this hook is wired). +run_wrap() { + FAKE_ROOT="$tmp" PATH="$tmp/agentsh-bin:$PATH" "$tmp/usr/local/bin/claude" "$@" +} + +run_wrap_no_agentsh() { + # Restrict PATH so `command -v agentsh` fails. + FAKE_ROOT="$tmp" PATH="/usr/bin:/bin" "$tmp/usr/local/bin/claude" "$@" +} + +# Test 1: real binary missing → exit 127 +rm "$tmp/usr/bin/claude" +out=$(run_wrap --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 127 ]; then + echo "FAIL: missing-real-binary should exit 127; got rc=$rc out=$out" + exit 1 +fi +# Restore for subsequent tests. +cat >"$tmp/usr/bin/claude" <<'EOF' +#!/bin/sh +echo "REAL-CLAUDE: $*" +EOF +chmod +x "$tmp/usr/bin/claude" +echo "PASS: missing-real-binary exits 127" + +# Test 2: agentsh missing → exit 1 +echo "shim" >"$tmp/run/agentsh/tier" +out=$(run_wrap_no_agentsh --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 1 ]; then + echo "FAIL: missing-agentsh should exit 1; got rc=$rc out=$out" + exit 1 +fi +echo "PASS: missing-agentsh exits 1" + +# Test 3: tier=none → exit 1 +echo "none" >"$tmp/run/agentsh/tier" +out=$(run_wrap --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 1 ]; then + echo "FAIL: tier=none should exit 1; got rc=$rc out=$out" + exit 1 +fi +echo "PASS: tier=none exits 1" + +# Test 4: tier file missing → exit 1 +rm "$tmp/run/agentsh/tier" +out=$(run_wrap --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 1 ]; then + echo "FAIL: tier-missing should exit 1; got rc=$rc out=$out" + exit 1 +fi +echo "PASS: tier-missing exits 1" + +# Test 5: everything green → engages wrap with args preserved +echo "shim" >"$tmp/run/agentsh/tier" +out=$(run_wrap --version --foo bar 2>&1) +expected="AGENTSH-WRAP: wrap -- $tmp/usr/bin/claude --version --foo bar" +if [ "$out" != "$expected" ]; then + echo "FAIL: engage path wrong" + echo " want: $expected" + echo " got: $out" + exit 1 +fi +echo "PASS: engages wrap with args" + +echo +echo "OK agent-wrap.sh (5/5)" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `chmod +x packaging/agent-wrap_test.sh && ./packaging/agent-wrap_test.sh` +Expected: FAIL with "agent-wrap.sh missing or not executable". + +- [ ] **Step 3: Implement the wrapper** + +Create `packaging/agent-wrap.sh`: + +```sh +#!/bin/sh +# /usr/lib/agentsh/agent-wrap — invoked via symlinks at /usr/local/bin/. +# Routes the agent through `agentsh wrap`. Fail-CLOSED: any health-check +# failure refuses the launch with a non-zero exit and a stderr message. +# +# This deviates from the parent kit's "never brick the sandbox" posture +# (parent spec §7) because this kit's purpose IS enforcement; running +# unenforced when the operator asked for enforcement is the worse failure. +# +# FAKE_ROOT is a TEST-ONLY hook: when set, /usr/bin and /run/agentsh paths +# are relocated under that root. Production must NOT set FAKE_ROOT. + +set -u + +# Test hook. Empty in production. +FAKE_ROOT="${FAKE_ROOT:-}" + +name=$(basename "$0") +real="${FAKE_ROOT}/usr/bin/$name" +tier_file="${FAKE_ROOT}/run/agentsh/tier" + +if [ ! -x "$real" ]; then + echo "agentsh-agent-wrap: real binary not found at $real" >&2 + exit 127 +fi + +if ! command -v agentsh >/dev/null 2>&1; then + echo "agentsh-agent-wrap: agentsh binary missing; refusing to launch $name without enforcement" >&2 + exit 1 +fi + +tier=$(cat "$tier_file" 2>/dev/null || echo missing) +if [ "$tier" != "shim" ]; then + echo "agentsh-agent-wrap: enforcement not active (tier='$tier'); refusing to launch $name" >&2 + exit 1 +fi + +exec agentsh wrap -- "$real" "$@" +``` + +```bash +chmod +x packaging/agent-wrap.sh +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `./packaging/agent-wrap_test.sh` +Expected: PASS — all 5 checks. + +- [ ] **Step 5: Run shellcheck** + +Run: `shellcheck packaging/agent-wrap.sh packaging/agent-wrap_test.sh` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add packaging/agent-wrap.sh packaging/agent-wrap_test.sh +git commit -m "packaging: agent-wrap.sh — engage \`agentsh wrap\` on launch (fail-closed)" +``` + +--- + +## Task 2: Installer script (`install-agent-wrappers.sh`) + tests + +**Files:** +- Create: `packaging/install-agent-wrappers.sh` +- Create: `packaging/install-agent-wrappers_test.sh` + +The installer runs per-sandbox in the kit's `install` step. It probes for known agents in `/usr/bin` and creates symlinks at `/usr/local/bin/` to `/usr/lib/agentsh/agent-wrap`. Skips on conflict, idempotent. + +- [ ] **Step 1: Write the failing test** + +Create `packaging/install-agent-wrappers_test.sh`: + +```bash +#!/usr/bin/env bash +# Smoke test for packaging/install-agent-wrappers.sh. +# Drives the installer through 5 scenarios using a FAKE_ROOT. + +set -euo pipefail + +here=$(cd "$(dirname "$0")" && pwd) +installer="$here/install-agent-wrappers.sh" + +if [ ! -x "$installer" ]; then + echo "FAIL: $installer missing or not executable" + exit 1 +fi + +setup_root() { + local root="$1" + rm -rf "$root" + mkdir -p "$root/usr/bin" "$root/usr/local/bin" "$root/usr/lib/agentsh" + cat >"$root/usr/lib/agentsh/agent-wrap" <<'EOF' +#!/bin/sh +exit 0 +EOF + chmod +x "$root/usr/lib/agentsh/agent-wrap" +} + +# Test 1: no agents present → no symlinks created +tmp=$(mktemp -d -t install-wrappers-1.XXXXXX); trap 'rm -rf "$tmp"' EXIT +setup_root "$tmp" +FAKE_ROOT="$tmp" "$installer" >/dev/null 2>&1 +if [ -n "$(ls -A "$tmp/usr/local/bin")" ]; then + echo "FAIL: empty /usr/bin should produce no symlinks; found: $(ls "$tmp/usr/local/bin")" + exit 1 +fi +echo "PASS: no agents → no symlinks" + +# Test 2: one agent present → one symlink to agent-wrap +setup_root "$tmp" +touch "$tmp/usr/bin/claude"; chmod +x "$tmp/usr/bin/claude" +FAKE_ROOT="$tmp" "$installer" >/dev/null 2>&1 +link_target=$(readlink "$tmp/usr/local/bin/claude" 2>/dev/null || echo MISSING) +if [ "$link_target" != "$tmp/usr/lib/agentsh/agent-wrap" ]; then + echo "FAIL: one-agent case: link=$link_target" + exit 1 +fi +echo "PASS: one agent → one symlink" + +# Test 3: multiple agents → all wrapped +setup_root "$tmp" +for a in claude opencode gemini; do + touch "$tmp/usr/bin/$a"; chmod +x "$tmp/usr/bin/$a" +done +FAKE_ROOT="$tmp" "$installer" >/dev/null 2>&1 +for a in claude opencode gemini; do + if [ ! -L "$tmp/usr/local/bin/$a" ]; then + echo "FAIL: $a was not wrapped" + exit 1 + fi +done +echo "PASS: multiple agents → all wrapped" + +# Test 4: pre-existing entry skipped (file) +setup_root "$tmp" +touch "$tmp/usr/bin/claude"; chmod +x "$tmp/usr/bin/claude" +echo "preexisting" >"$tmp/usr/local/bin/claude" +out=$(FAKE_ROOT="$tmp" "$installer" 2>&1) +if [ ! -f "$tmp/usr/local/bin/claude" ] || [ -L "$tmp/usr/local/bin/claude" ]; then + echo "FAIL: pre-existing file at /usr/local/bin/claude was overwritten" + exit 1 +fi +content=$(cat "$tmp/usr/local/bin/claude") +if [ "$content" != "preexisting" ]; then + echo "FAIL: pre-existing file content changed; got: $content" + exit 1 +fi +if ! echo "$out" | grep -q "exists; not overwriting"; then + echo "FAIL: expected 'exists; not overwriting' message; got: $out" + exit 1 +fi +echo "PASS: pre-existing file skipped with warning" + +# Test 5: missing wrap script → exit 0, warning, no symlinks +setup_root "$tmp" +rm "$tmp/usr/lib/agentsh/agent-wrap" +touch "$tmp/usr/bin/claude"; chmod +x "$tmp/usr/bin/claude" +out=$(FAKE_ROOT="$tmp" "$installer" 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 0 ]; then + echo "FAIL: missing-wrap should exit 0 (fail-open); got rc=$rc" + exit 1 +fi +if [ -n "$(ls -A "$tmp/usr/local/bin")" ]; then + echo "FAIL: missing-wrap should produce no symlinks" + exit 1 +fi +if ! echo "$out" | grep -q "agent-wrap missing"; then + echo "FAIL: expected 'agent-wrap missing' warning; got: $out" + exit 1 +fi +echo "PASS: missing-wrap exits 0 with warning, no symlinks" + +# Test 6: idempotent (run twice on multi-agent setup → same end state) +setup_root "$tmp" +for a in claude opencode; do + touch "$tmp/usr/bin/$a"; chmod +x "$tmp/usr/bin/$a" +done +FAKE_ROOT="$tmp" "$installer" >/dev/null 2>&1 +state1=$(ls -la "$tmp/usr/local/bin" | sort) +FAKE_ROOT="$tmp" "$installer" >/dev/null 2>&1 +state2=$(ls -la "$tmp/usr/local/bin" | sort) +if [ "$state1" != "$state2" ]; then + echo "FAIL: not idempotent" + diff <(echo "$state1") <(echo "$state2") || true + exit 1 +fi +echo "PASS: idempotent" + +echo +echo "OK install-agent-wrappers.sh (6/6)" +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `chmod +x packaging/install-agent-wrappers_test.sh && ./packaging/install-agent-wrappers_test.sh` +Expected: FAIL with "install-agent-wrappers.sh missing or not executable". + +- [ ] **Step 3: Implement the installer** + +Create `packaging/install-agent-wrappers.sh`: + +```sh +#!/bin/sh +# /usr/lib/agentsh/install-agent-wrappers.sh +# Probe /usr/bin for known agent binaries and create /usr/local/bin/ +# symlinks pointing at /usr/lib/agentsh/agent-wrap. Skips when: +# - the agent binary isn't present (nothing to wrap) +# - /usr/local/bin/ already exists (don't fight the agent kit) +# +# Idempotent. Fail-open if the wrap script itself is missing (warns, exits 0, +# leaves /usr/local/bin untouched). +# +# FAKE_ROOT is a TEST-ONLY hook: when set, all paths are relocated under it. +# Production must NOT set FAKE_ROOT. + +set -eu + +FAKE_ROOT="${FAKE_ROOT:-}" +WRAP="${FAKE_ROOT}/usr/lib/agentsh/agent-wrap" +DEST="${FAKE_ROOT}/usr/local/bin" +BIN="${FAKE_ROOT}/usr/bin" + +# Known agent binaries. Extend this list as Docker Sandboxes adds support. +AGENTS="claude opencode gemini codex cursor" + +if [ ! -x "$WRAP" ]; then + echo "install-agent-wrappers: agent-wrap missing at $WRAP; skipping (kit still works without auto-wrap)" >&2 + exit 0 +fi + +mkdir -p "$DEST" + +for agent in $AGENTS; do + if [ ! -x "$BIN/$agent" ]; then + continue + fi + target="$DEST/$agent" + if [ -e "$target" ] || [ -L "$target" ]; then + echo "install-agent-wrappers: $target exists; not overwriting" >&2 + continue + fi + ln -s "$WRAP" "$target" + echo "install-agent-wrappers: wrapped $agent" +done +``` + +```bash +chmod +x packaging/install-agent-wrappers.sh +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `./packaging/install-agent-wrappers_test.sh` +Expected: PASS — 6/6. + +- [ ] **Step 5: Run shellcheck** + +Run: `shellcheck packaging/install-agent-wrappers.sh packaging/install-agent-wrappers_test.sh` +Expected: no errors. + +- [ ] **Step 6: Commit** + +```bash +git add packaging/install-agent-wrappers.sh packaging/install-agent-wrappers_test.sh +git commit -m "packaging: install-agent-wrappers.sh — symlink /usr/local/bin/ on install" +``` + +--- + +## Task 3: Package the new scripts via `.goreleaser.yml` + +**Files:** +- Modify: `.goreleaser.yml` + +Add two `nfpms.contents` entries so `agent-wrap` and `install-agent-wrappers.sh` ship in every .deb/.rpm/.apk. + +- [ ] **Step 1: Locate the insertion point** + +Read `.goreleaser.yml`. Find the existing `nfpms.contents` entry for `bash_startup.sh`: + +``` +grep -nE "bash_startup|/usr/lib/agentsh" .goreleaser.yml +``` + +The new entries go alongside the existing `/usr/lib/agentsh/bash_startup.sh` line. + +- [ ] **Step 2: Add the entries** + +In `.goreleaser.yml`, find the line containing `- src: packaging/bash_startup.sh` and add immediately after the block ending with that entry's `mode: 0755`: + +```yaml + # Auto-wrap agent harness (paired with the Docker Sandboxes mixin kit). + - src: packaging/agent-wrap.sh + dst: /usr/lib/agentsh/agent-wrap + file_info: + mode: 0755 + - src: packaging/install-agent-wrappers.sh + dst: /usr/lib/agentsh/install-agent-wrappers.sh + file_info: + mode: 0755 +``` + +- [ ] **Step 3: Validate the config** + +Run: `goreleaser check` +Expected: PASS, no warnings. + +- [ ] **Step 4: Snapshot-build to confirm packaging** + +Run: `goreleaser build --snapshot --clean --single-target --id sbx-bootstrap-linux 2>&1 | tail -10` +Expected: success. + +If a full nfpms snapshot build is feasible in the local env (CGO + libseccomp setup), run: +``` +goreleaser release --snapshot --clean --skip=publish +``` +and confirm a `.deb` contains the two new files: +``` +dpkg-deb -c dist/agentsh_*_linux_amd64.deb | grep -E '/usr/lib/agentsh/(agent-wrap|install-agent-wrappers)' +``` +Expected: both lines present. If the snapshot build fails for unrelated CGO reasons, `goreleaser check` is the authoritative gate. + +- [ ] **Step 5: Commit** + +```bash +git add .goreleaser.yml +git commit -m "release: package agent-wrap.sh and install-agent-wrappers.sh" +``` + +--- + +## Task 4: Wire the installer into the kit's `spec.yaml` + +**Files:** +- Modify: `docker/sbx-kit/spec.yaml` + +Add a second `install` command that runs `install-agent-wrappers.sh` after `install.sh` finishes. No `environment.variables` block — the v1 design unconditionally engages wrap. + +- [ ] **Step 1: Read the current spec.yaml** + +Run: `cat docker/sbx-kit/spec.yaml`. Confirm there is exactly one entry under `commands.install`. + +- [ ] **Step 2: Add the second install command** + +Edit `docker/sbx-kit/spec.yaml`. Under `commands.install:`, after the existing `curl install.sh | sh` entry, append: + +```yaml + - command: ["/usr/lib/agentsh/install-agent-wrappers.sh"] + user: "0" + description: Wrap detected agent binaries via /usr/local/bin/ symlinks +``` + +The full `commands.install` block should look like: + +```yaml + install: + - command: "/bin/sh -c 'curl -fsSL https://github.com/erans/agentsh/releases/latest/download/install.sh | sh'" + user: "0" + description: Install agentsh from the latest GitHub release + - command: ["/usr/lib/agentsh/install-agent-wrappers.sh"] + user: "0" + description: Wrap detected agent binaries via /usr/local/bin/ symlinks +``` + +- [ ] **Step 3: Commit** + +```bash +git add docker/sbx-kit/spec.yaml +git commit -m "sbx: wire install-agent-wrappers.sh into spec.yaml install step" +``` + +(The Go structural test that asserts on the new install command lands in Task 6.) + +--- + +## Task 5: Extend E2E test to cover wrapper engagement + +**Files:** +- Modify: `docker/sbx-kit/tests/run-e2e.sh` + +Add Check 8: after the existing 7 checks pass, install a fake `agentsh` binary that emits a recognizable marker, install a fake `/usr/bin/claude` stub, run the installer to create `/usr/local/bin/claude`, then invoke `claude` from a fresh login shell and assert the wrap marker appears. + +- [ ] **Step 1: Modify `docker/sbx-kit/tests/run-e2e.sh`** + +Read the file. Find the end of the existing check 7 block (the `user override stub present` check). After that block, BEFORE the `summary:` line, insert: + +```bash +# --------------------------------------------------------------------------- +# 8. Wrapper engagement check. +# Install a fake /usr/bin/claude stub, a fake `agentsh` that emits a +# recognizable marker, run the installer to create /usr/local/bin/claude, +# then invoke claude via a login shell and verify the wrap chain fired. +# --------------------------------------------------------------------------- + +log +log "Verifying agent wrap engagement:" + +in_container ' +set -e + +# Fake agentsh that announces itself when called as `agentsh wrap`. +cat >/usr/bin/agentsh </usr/bin/claude <&1 || true) + +if printf '%s' "$out" | grep -q 'FAKE-AGENTSH-WRAP-MARKER: wrap -- /usr/bin/claude --version'; then + pass "wrapper engages \`agentsh wrap\` with args preserved" +else + fail "wrapper did not engage wrap (or args dropped)" + log "----- claude invocation output -----" + printf '%s\n' "$out" + log "------------------------------------" +fi +``` + +- [ ] **Step 2: Add the new host-side mounts** + +Earlier in `run-e2e.sh`, find the `docker run -d` block. Add two `-v` flags so the host's packaging scripts are reachable inside the container at `/sbx-e2e/agent-wrap` and `/sbx-e2e/install-agent-wrappers`: + +```bash +docker run -d --rm --name "$CONTAINER" --user 0 \ + -v "$STAGE/bin:/sbx-e2e/bin:ro" \ + -v "$REPO/configs/policies/coding-agent.yaml:/sbx-e2e/coding-agent.yaml:ro" \ + -v "$REPO/packaging/config.yaml:/sbx-e2e/server-config.yaml:ro" \ + -v "$REPO/docker/sbx-kit/files:/sbx-e2e/kit-files:ro" \ + -v "$STAGE/home-overrides/policy.yaml:/sbx-e2e/user-override.yaml:ro" \ + -v "$REPO/packaging/agent-wrap.sh:/sbx-e2e/agent-wrap:ro" \ + -v "$REPO/packaging/install-agent-wrappers.sh:/sbx-e2e/install-agent-wrappers:ro" \ + "$IMAGE" \ + sleep 600 >/dev/null +``` + +- [ ] **Step 3: Run the E2E** + +Run: `bash docker/sbx-kit/tests/run-e2e.sh` +Expected: `summary: 8 pass, 0 fail` (was 7 pass, now 8). + +If check 8 fails on "args dropped," verify the bash login shell actually has `/usr/local/bin` ahead of `/usr/bin` in PATH inside the sandbox-template image. The default Debian-derived templates do. If not, the wrapper may need a more aggressive PATH-precedence mechanism — but that's a separate plan. + +- [ ] **Step 4: Commit** + +```bash +git add docker/sbx-kit/tests/run-e2e.sh +git commit -m "sbx: extend e2e to verify agent-wrap engagement" +``` + +--- + +## Task 6: Update Go structural test for the spec.yaml change + +**Files:** +- Modify: `docker/sbx-kit/spec_test.go` + +The existing `TestSpecYAML_InstallReferencesInstallScript` asserts exactly ONE install command. With Task 4 there are now two. Rewrite the assertion to require two and check both. + +- [ ] **Step 1: Read the current test** + +Run: `grep -nA 15 'TestSpecYAML_InstallReferencesInstallScript' docker/sbx-kit/spec_test.go` + +- [ ] **Step 2: Update the test** + +Replace the body of `TestSpecYAML_InstallReferencesInstallScript` with: + +```go +func TestSpecYAML_InstallReferencesInstallScript(t *testing.T) { + s := loadSpec(t) + if len(s.Commands.Install) != 2 { + t.Fatalf("expected exactly two install commands, got %d", len(s.Commands.Install)) + } + + // First entry: curl install.sh | sh + first := s.Commands.Install[0].Command + if !strings.Contains(first, "install.sh") { + t.Errorf("first install command does not curl install.sh: %q", first) + } + if s.Commands.Install[0].User != "0" { + t.Errorf("first install user = %q, want %q (root)", s.Commands.Install[0].User, "0") + } + + // Second entry: install-agent-wrappers.sh + second := s.Commands.Install[1].Command + if !strings.Contains(second, "install-agent-wrappers.sh") { + t.Errorf("second install command does not invoke install-agent-wrappers.sh: %q", second) + } + if s.Commands.Install[1].User != "0" { + t.Errorf("second install user = %q, want %q (root)", s.Commands.Install[1].User, "0") + } +} +``` + +Note: `Command` on the install struct is currently a single `string`. The second install entry uses a YAML list form (`["/usr/lib/agentsh/install-agent-wrappers.sh"]`). YAML unmarshal of a list into `string` will produce a string representation. If the test fails with a parse issue, change the struct to accept either form by using `yaml.Node` or by changing the spec.yaml to use the single-string form: + +```yaml + - command: "/usr/lib/agentsh/install-agent-wrappers.sh" +``` + +This is uniform with the first entry. Prefer this — change spec.yaml in Task 4 to use the string form too if you haven't already, so the struct shape doesn't need to change. + +- [ ] **Step 3: If needed, reconcile the YAML form** + +If you used the list form in Task 4, edit `docker/sbx-kit/spec.yaml` to use the string form: + +```yaml + - command: "/usr/lib/agentsh/install-agent-wrappers.sh" + user: "0" + description: Wrap detected agent binaries via /usr/local/bin/ symlinks +``` + +Functionally identical to the list form for Docker Sandboxes; consistent with the first entry; matches the test's `string` expectation. + +- [ ] **Step 4: Run the test** + +Run: `go test ./docker/sbx-kit/... -run TestSpecYAML_InstallReferencesInstallScript -v -count=1` +Expected: PASS. + +- [ ] **Step 5: Run the full sbx-kit + bootstrap suites** + +Run: `go test ./docker/sbx-kit/... ./cmd/agentsh-sbx-bootstrap/... -count=1` +Expected: PASS. + +- [ ] **Step 6: Commit** + +```bash +git add docker/sbx-kit/spec_test.go docker/sbx-kit/spec.yaml +git commit -m "sbx: extend spec_test.go to assert the second install command" +``` + +--- + +## Task 7: Documentation updates + +**Files:** +- Modify: `docker/sbx-kit/README.md` +- Modify: `docs/policy-reference.md` + +Document the new behavior in the kit README and add the two new paths to the policy reference table. + +- [ ] **Step 1: Update `docker/sbx-kit/README.md`** + +Read the current README. Find the section "## v1 enforcement tier" — insert a new "## Behavior: agent harness runs under `agentsh wrap`" section immediately after it (and before "## E2E test (no `sbx` required)"): + +```markdown +## Behavior: agent harness runs under `agentsh wrap` + +This kit runs the agent harness under `agentsh wrap` whenever it can. After +install, the kit creates symlinks at `/usr/local/bin/` (for known +agents present in the sandbox) that route launches through `agentsh wrap`, +giving you full exec-pipeline interception of every subprocess the agent +spawns, a coherent session, and a session report on exit. + +Wrapped agents (v1): `claude`, `opencode`, `gemini`, `codex`, `cursor`. The +installer skips agents whose binary isn't present in `/usr/bin/` and skips +any entry that already exists in `/usr/local/bin/` (never overwrites +something the agent kit shipped). + +### Fail-CLOSED deviation from the parent spec + +When the wrapper at `/usr/local/bin/` runs, it exits non-zero and +refuses to launch the agent if AgentSH cannot engage cleanly: the `agentsh` +binary is missing, `/run/agentsh/tier` does not read `shim`, or the tier +file is missing. Choosing this kit means choosing enforcement-mandatory +semantics; running unenforced is not a supported state. + +This deviates from the parent spec's §7 "never bricks the sandbox" stance. +The parent spec governs the kit's *bootstrap*; this section governs the +wrapper's behavior at *agent launch time*. + +### Known limitations + +- **Absolute-path entrypoints bypass the wrapper.** The mechanism relies on + `/usr/local/bin` preceding `/usr/bin` in PATH. An agent kit whose + entrypoint invokes `/usr/bin/claude` directly is unaffected by the + wrapper. Verify per agent kit before relying on auto-wrap. +- **Install-time failures pass through.** If the kit's `install` command + itself fails (curl 404, package install error), the wrappers are never + created and the agent runs unwrapped. sbx run should report this + failure visibly. +``` + +- [ ] **Step 2: Update `docs/policy-reference.md`** + +Read the existing "Where things live" table. Append two rows (preserving the table's pipe-style format): + +```markdown +| `/usr/lib/agentsh/agent-wrap` | OS package, read-only | Shared wrapper script for agent binaries | +| `/usr/local/bin/` | Kit install step | Symlink to agent-wrap (created per detected agent) | +``` + +These rows go at the end of the existing table. + +- [ ] **Step 3: Verify both docs render** + +Run: `wc -l docker/sbx-kit/README.md docs/policy-reference.md` +Expected: both files exist with non-zero line counts; visually scan the diff to confirm the additions land in the right place. + +- [ ] **Step 4: Commit** + +```bash +git add docker/sbx-kit/README.md docs/policy-reference.md +git commit -m "docs: document agent-wrap behavior + fail-closed deviation" +``` + +--- + +## Self-Review + +**Spec coverage** against `docs/superpowers/specs/2026-05-11-sbx-agent-wrap.md`: + +- §1 Goal — Tasks 1+2+4 (wrapper + installer + spec.yaml wiring) plus Task 5 (e2e proves end-to-end). ✓ +- §2 Constraint reminder — informational, embodied in Task 5's reliance on PATH precedence (`bash -lc`). ✓ +- §3 Non-goals — no env var anywhere in any task. ✓ +- §4 Fail-closed table — Task 1 step 3's wrapper script implements all four rows. ✓ +- §5 Components — Tasks 1, 2 (scripts), Task 3 (packaging), Task 4 (spec.yaml), Task 5 (e2e). ✓ +- §6 Wrapper — Task 1. ✓ +- §7 Installer — Task 2. ✓ +- §8 Kit integration — Task 4. ✓ +- §9 Testing — Task 1 (5 wrapper tests), Task 2 (6 installer tests — one more than spec said because idempotent was a separate case worth covering), Task 5 (e2e check 8), Task 6 (Go structural test update). ✓ +- §10 Documentation — Task 7. ✓ +- §11 Risk register — informational; risks are mitigated through the e2e check and the explicit fail-closed posture documented in Task 7. ✓ +- §12 Out of scope — preserved by absence; no tasks for LD_PRELOAD/ptrace/etc. ✓ + +**Placeholder scan:** no `TBD`/`TODO`/"fill in" in any task. Each step has either runnable code, a runnable command, or an exact file edit. + +**Type consistency:** +- `FAKE_ROOT` env var: used in Task 1 (wrapper) and Task 2 (installer), same semantics. ✓ +- Agent list `claude opencode gemini codex cursor`: Task 2 installer, Task 5 e2e references just `claude`, Task 7 docs list all five. ✓ +- `/usr/lib/agentsh/agent-wrap` path: Tasks 1/2/3/7 all reference it consistently. ✓ +- `/run/agentsh/tier` path + value `"shim"`: wrapper script (Task 1) and e2e (Task 5) both check for the same string. ✓ +- `agentsh wrap --` invocation: wrapper exec'd command (Task 1) matches the marker the e2e asserts (Task 5). ✓ + +No gaps found. Plan is ready. diff --git a/docs/superpowers/specs/2026-05-10-db-plan-04c-simple-query-events-design.md b/docs/superpowers/specs/2026-05-10-db-plan-04c-simple-query-events-design.md new file mode 100644 index 000000000..44a1a4d95 --- /dev/null +++ b/docs/superpowers/specs/2026-05-10-db-plan-04c-simple-query-events-design.md @@ -0,0 +1,497 @@ +# db-access Plan 04c — Simple Query + DBEvent Emission (design) + +Status: design approved 2026-05-10. Implementation plan to follow via writing-plans. + +Cross-references: +- Roadmap: `docs/superpowers/specs/2026-05-08-db-access-phase-1-roadmap-design.md` §3 Plan 04c. +- Plan 04 skeleton (parent design): `docs/superpowers/specs/2026-05-10-db-plan-04-pg-proxy-skeleton-design.md`. +- Plan 04b₂ (predecessor): `docs/superpowers/specs/2026-05-10-db-plan-04b2-upstream-passthrough-design.md`. +- Spec: `docs/agentsh-db-access-spec.md` v0.8 §7.1 (wire framing), §7.7 (search_path), §8 (DBEvent), §10.2 (most-restrictive), §10.3 (redaction tiers), §14.1 / §14.3 / §14.4 (Simple Query and pre-/in-tx deny semantics), §23.4 steps 5+7. +- Predecessors shipped: Plans 01 (effects), 02 (policy), 03 (classify/postgres), 04a (listener), 04b (handshake/TLS), 04b₂ (upstream wiring/passthrough/cancel). + +This document covers the package-shape, control-flow, schema, and test decisions for the sub-plan that closes the Phase 1 Simple Query loop. The skeleton design's §6 sketched it; this expands it with the choices settled during brainstorming. + +## 1. Scope + +### In scope + +- Continue the per-connection driver past the first upstream `ReadyForQuery`: from `forwardAuth`'s exit into a per-conn `simpleQueryLoop`. +- `'Q'`-frame handling: classify (Plan 03) → evaluate per statement (Plan 02) → forward-or-synthesize-deny → emit one `db_statement` event per `ClassifiedStatement`. +- Per-conn `lastUpstreamRFQ` status-byte tracker (`I` | `T` | `E` | 0), updated on every observed upstream `'Z'` frame. Drives deny synthesis. +- Multi-statement parse-all-before-forward: if any of N statements in a single `Q` body denies, forward none; emit N events. +- Per-frame demux of upstream response stream until trailing `'Z'`, accumulating `bytes_in`, `bytes_out`, `rows_returned`, `rows_affected`, `latency_ms`, `error_code` for `EventResult`. +- Deny synthesis: + - `lastUpstreamRFQ ∈ {0, 'I'}` (out-of-tx / pre-auth): `ErrorResponse` + `ReadyForQuery('I')` locally; loop continues. + - `lastUpstreamRFQ ∈ {'T', 'E'}` (in-tx): `ErrorResponse` only; close upstream + close client; `tx_context.deny_action = "connection_terminated"`. +- `approve` rule verb at runtime → synthesize `deny` with `error_code: APPROVE_NOT_YET_SUPPORTED`; emit a config-load warning when `Unavoidability != off` and any rule has `decision: approve`. +- Frame budget cap: `Q` body > `MaxQueryBytes` (default 1 MiB) → synthetic `ErrorResponse(54000, "statement too large for AgentSH proxy: N bytes > 1 MiB cap")` + `ReadyForQuery('I')` + close; emits a lifecycle event with `error_code: FRAME_TOO_LARGE`. +- Non-`'Q'` / non-`'X'` frame post-handshake → synthetic `ErrorResponse(0A000, "Extended Query / COPY / FunctionCall not supported in AgentSH proxy phase 1")` + close; lifecycle event `EXTENDED_QUERY_NOT_SUPPORTED` (or `FUNCTION_CALL_PROTOCOL_DENIED` for `'F'`). +- `events.DBEvent` extended with §8 sub-structs (`TLS`, `Decision`, `Result`, `TxContext`, `Predicates`). 04c populates `Decision`, `Result`, `TxContext.InTransaction`, `TxContext.DenyAction`, `Predicates.HasFilter`, `TLS.Mode`, `TLS.ClientSNI`. +- `Parser.Normalize(sql string) (string, error)` added to `internal/db/classify/postgres.Parser`. libpg_query backend calls `pg_query_normalize`; pure-Go backend uses a regex literal-scrubber. `statement_digest = sha256:` + hex(SHA-256(Normalize(sql))) for every tier — digest invariant under redaction. +- Per-dialect classifier map built in `postgres.Server.New()` keyed on `svc.Dialect`. Same dialect across services shares a `Parser` instance. Unexported test hook for fake injection. +- Hot-swappable policy: `Server.SetPolicy(*policy.RuleSet)` via `atomic.Pointer[policy.RuleSet]`. Each statement reads the snapshot once at evaluate time. No mid-statement swap. +- `effects.ClassifiedStatement` gains `SourceStart` / `SourceEnd` (byte offsets into the original `Q` body) for per-stmt text slicing under `RedactionFull`. libpg_query exposes `stmt_location` + `stmt_len`; pure-Go fallback re-uses its splitter. +- Spine integration test: real `jackc/pgx/v5` client → real `*Server` in `terminate_reissue` mode → fake upstream goroutine speaking `pgproto3.Backend`. Three subtests: allow, pre-tx deny, in-tx deny terminate. Adds `pgx` as test-only dep. + +### Out of scope (deferred) + +- Extended Query (`Parse`/`Bind`/`Describe`/`Execute`/`Sync`/`Flush`/`Close`), SQL-level prepared cache, COPY data-frame handling, FunctionCall sub-protocol semantics (we reject the frame; we do not honor it). — Plan 05. +- Full §14 deny modes (`rollback_then_continue`, `idle_until_next_simple_query`); `tx_context.tx_started_at`; `approve` runtime workflow; GSSENC opt-in; async LISTEN/NOTIFY push delivery between Q…Z round-trips. — Plan 05. +- `BackendKeyData` mapping table; cancel governance via mapping lookup. — Plan 06. +- Out-of-process proxy under distinct SessionID; SO_PEERCRED → SessionID resolution; unavoidability bundle; testcontainer integration suite; recommendation flip to `enforce`. — Plan 07. + +## 2. Architectural decisions + +**D1. Single per-conn driver, half-duplex.** Simple Query is half-duplex by spec (one `'Q'` → response frames → `'Z'`). One goroutine per connection drives `simpleQueryLoop`, which sequentially reads a client frame, dispatches `'Q'` / `'X'` / other, and on allow-forward runs `forwardUpstreamUntilRFQ` to read upstream frames one at a time. No fan-in goroutines, no shared state between client and upstream readers. Async upstream pushes outside a round-trip (LISTEN/NOTIFY) are documented as deferred to Plan 05's two-goroutine model. + +**D2. Statement digest is invariant under redaction.** `statement_digest = sha256:` + hex(SHA-256(Normalize(stmt))) for all three tiers (`full` / `parameters_redacted` / `none`). Operators integrating across deployments with different `LogStatements` settings can still join events on digest. Documented cross-implementation caveat: libpg_query and pure-Go `Normalize` outputs may differ, so digests are stable *within an implementation*, not across. + +**D3. `Normalize` on Parser, not in proxy.** `internal/db/classify/postgres.Parser` gains `Normalize(sql string) (string, error)`. libpg_query backend calls `pg_query_normalize`. Pure-Go backend does a regex literal-scrubber for `'…'` strings, `$tag$…$tag$` dollar-quotes, and numeric literals. On `unknown` classification the digest is computed off the verbatim trimmed SQL with a documented note. Avoids duplicating normalization logic in the proxy and keeps the classifier the single source of truth for SQL surface manipulation. + +**D4. Per-frame demux for upstream response.** The allow-forward path reads upstream frames one at a time via `pc.state.upstreamFE.Receive()` until `'Z'`. Forwards each frame to the client; accumulates result counters. The alternative (bytePump + 'Z' snoop) was rejected because it cannot populate `rows_returned` / `rows_affected` / `latency_ms` for the DBEvent, which the spec §8 result struct mandates. + +**D5. Full §8 DBEvent schema, partial population.** Plan 04c lands `Decision`, `Result`, `TxContext`, `Predicates`, `TLS` sub-structs on `events.DBEvent`. Populates what is knowable in 04c (RFQ-byte-only transaction state, no `tx_started_at`, no `rollback_injected`); Plan 05's state machine fills the rest. Schema-stable from 04c forward so downstream consumers (audit sinks, dashboards) are not on a moving target. + +**D6. Auto-built per-dialect classifier map.** `Server.New()` constructs a `map[string]postgres.Parser` from the dialects of declared `cfg.Services`. Same dialect across services shares one `Parser` (parsers are expensive on Linux+CGO). An unexported `classifierForTest` test hook overrides the map per-test. + +**D7. Hot-swappable policy via `atomic.Pointer`.** `Server.SetPolicy(*policy.RuleSet)` swaps the active rule set atomically. Each statement's classify+evaluate reads the current snapshot once; no mid-statement swap. Cheap, well-tested pattern elsewhere in the project. Supervisor reload paths plug in cleanly. + +**D8. Cross-plan touch on Plan 03's `ClassifiedStatement`.** Per-stmt text slicing under `RedactionFull` requires byte spans. We add `SourceStart` / `SourceEnd` to `effects.ClassifiedStatement` as part of 04c rather than retrofitting later. libpg_query's `stmt_location` + `stmt_len` make this a one-field surface bump. Pure-Go fallback re-uses its splitter. Accepted scope creep — duplicating split logic in the proxy would be worse. + +## 3. Package layout + +New files under `internal/db/proxy/postgres/` (all `//go:build linux`; the existing `stub_other.go` keeps non-Linux compiling): + +``` +internal/db/proxy/postgres/ +├── simplequery.go simpleQueryLoop; handleQuery; non-Q/non-X dispatch; MaxQueryBytes +├── upstreamread.go forwardUpstreamUntilRFQ; per-frame demux; counter accumulation; +│ CommandComplete tag parsing; per-stmt counter attribution +├── deny.go synthErrorAndRFQ; synthErrorOnly; pickDenySynth; SQLSTATE picker; +│ deny_message template substitution +├── eventbuilder.go buildStatementEvent (pure); redaction tier render; statement_digest; +│ per-stmt slice via ClassifiedStatement.SourceStart/SourceEnd +├── classifiers.go per-dialect Parser map; Server.classifierFor(dialect); +│ test hook plumbing +└── *_test.go unit + spine tests +``` + +Modified files: + +``` +internal/db/proxy/postgres/ +├── server.go Config gains MaxQueryBytes + classifierForTest; New() builds +│ dialect→Parser map and validates dialects; SetPolicy method; +│ policy() helper returning atomic snapshot +├── proxyconn.go connState gains lastUpstreamRFQ, redactionTier, tlsMode; +│ emit helpers for statement events + frame-too-large lifecycle event +├── handshake.go dialUpstreamAndForward, after forwardAuth returns successfully, +│ seeds lastUpstreamRFQ='I' and calls simpleQueryLoop instead of +│ returning nil +└── authforward.go forwardAuth writes the observed 'Z' status byte into connState +│ before returning (avoids a re-read in simpleQueryLoop) + +internal/db/classify/postgres/ +├── parser.go Parser interface gains Normalize(sql) (string, error) +├── libpgquery.go wire pg_query_normalize +├── wasm.go wire wasilibs/go-pgquery Normalize (or document regex fallback) +└── parser_normalize_test.go per-implementation + parity-on-curated-subset tests + +internal/db/effects/ +├── statement.go ClassifiedStatement gains SourceStart, SourceEnd (int byte offsets; +│ zero-valued when parser cannot supply them, e.g. unknown stmt) +└── statement_test.go coverage of the new fields + +internal/db/events/ +└── event.go DBEvent extended with TLS, Decision, Result, TxContext, Predicates + sub-structs; types defined in same file or split per taste +``` + +### Boundary calls + +- `internal/db/proxy/postgres` depends on `effects`, `policy`, `classify/postgres`, `events`, `service`, `tlsleaf` (already true in 04a/b/b₂). No new external deps in the production path. +- `pgx` is a **test-only** dep; added to `go.mod` under no build tag (Go's testing has no separate dep set) but only imported from `_test.go` files. `go mod tidy` must remain clean. +- `events.DBEvent` schema extension is consumed by callers in `internal/db/proxy/postgres/eventbuilder.go` only. No other package builds events today; safe. + +## 4. Public surface + +### `internal/db/proxy/postgres.Config` additions + +```go +type Config struct { + // existing fields unchanged... + + // MaxQueryBytes caps the 'Q' frame body length. Default 1 MiB when zero. + // Statements above the cap get a synthetic ErrorResponse(54000) + close. + MaxQueryBytes int + + // classifierForTest, when non-nil, overrides the per-dialect Parser map + // built by New(). Test-only — production callsites must leave this nil. + classifierForTest func(dialect string) postgres.Parser +} +``` + +### `Server` additions + +```go +func (s *Server) SetPolicy(rs *policy.RuleSet) // atomic.Pointer swap; nil → implicit-deny everywhere +func (s *Server) policy() *policy.RuleSet // unexported; reads atomic snapshot +func (s *Server) classifierFor(dialect string) postgres.Parser // unexported +``` + +`New(cfg)` additionally: + +1. Validates each `svc.Dialect` is recognized (`postgres`, `aurora_postgres`, `cockroachdb`, `redshift`); rejects unknowns with a clear error. +2. Builds a `map[string]postgres.Parser` keyed by dialect; shared across services with the same dialect. +3. Applies `MaxQueryBytes` default of 1 MiB when `cfg.MaxQueryBytes == 0`. +4. Stores `cfg.Policy` in the atomic pointer. + +### `connState` additions + +```go +type connState struct { + // existing fields unchanged... + lastUpstreamRFQ byte // 'I' | 'T' | 'E' | 0 + redactionTier policy.RedactionTier // resolved at handshake end from rs.Redaction().LogStatements + tlsMode string // svc.TLSMode at handshake end, for EventTLS.Mode +} +``` + +## 5. Simple Query control flow + +### Entry from `forwardAuth` + +`handshake.go::dialUpstreamAndForward` currently returns `nil` after `forwardAuth` returns on the first observed upstream `'Z'`. 04c changes the tail of that function to: + +```go +if err := forwardAuth(ctx, pc); err != nil { /* unchanged paths */ } +// forwardAuth wrote the observed 'Z' status byte into pc.state.lastUpstreamRFQ +// before returning, so we don't re-read. +pc.state.redactionTier = pc.srv.policy().Redaction().LogStatements +pc.state.tlsMode = pc.svc.TLSMode +return pc.simpleQueryLoop(ctx) +``` + +### `simpleQueryLoop` + +```go +func (pc *proxyConn) simpleQueryLoop(ctx context.Context) error { + for { + if err := ctx.Err(); err != nil { return err } + msg, err := pc.backend.Receive() + if err != nil { return err } // EOF / closed-pipe are normal terminations + switch m := msg.(type) { + case *pgproto3.Query: + if err := pc.handleQuery(ctx, m); err != nil { return err } + case *pgproto3.Terminate: + pc.state.upstreamFE.Send(m) + _ = pc.state.upstreamFE.Flush() + return nil + default: + return pc.handleUnsupportedFrame(ctx, m) + } + } +} +``` + +`pc.backend.Receive` is the existing pgproto3 Backend already wired in 04b. `MaxQueryBytes` enforcement happens at the `handleQuery` entry against `len(q.String)` (cheap; pgproto3 has already allocated the body, so this is mitigation-against-griefing rather than a hard pre-allocation ceiling — Phase 1 trades off the perfect ceiling for code simplicity. Documented limitation.). + +### `handleQuery` + +```go +func (pc *proxyConn) handleQuery(ctx context.Context, q *pgproto3.Query) error { + if len(q.String) > pc.srv.cfg.MaxQueryBytes { + pc.emitFrameTooLarge(ctx, len(q.String)) + _ = pc.synthErrorAndRFQ("54000", frameTooLargeMsg(len(q.String))) + return errFrameTooLargeClose + } + parser := pc.srv.classifierFor(pc.svc.Dialect) + stmts, _ := parser.Classify(q.String, postgres.SessionState{}, postgres.Options{}) + rs := pc.srv.policy() + decisions := make([]policy.Decision, len(stmts)) + anyDeny := false + for i, s := range stmts { + decisions[i] = policy.Evaluate(s, rs, policy.ServiceID(pc.svc.Name)) + if decisions[i].Verb == policy.VerbApprove { + decisions[i] = synthApproveAsDeny(decisions[i]) // APPROVE_NOT_YET_SUPPORTED + } + if decisions[i].Verb == policy.VerbDeny { anyDeny = true } + } + + if !anyDeny { + sentAt := timeNow() + pc.state.upstreamFE.Send(q) + if err := pc.state.upstreamFE.Flush(); err != nil { return err } + result, err := pc.forwardUpstreamUntilRFQ(ctx, sentAt, len(q.String)) + pc.emitAllowEvents(ctx, stmts, decisions, q.String, result) + return err + } + + pc.emitDenyEvents(ctx, stmts, decisions, q.String) + rendered, denyCode := pickDenySynth(decisions) + switch pc.state.lastUpstreamRFQ { + case 0, 'I': + return pc.synthErrorAndRFQ(denyCode, rendered) + case 'T', 'E': + _ = pc.synthErrorOnly(denyCode, rendered) + return errInTxTerminate + default: + return fmt.Errorf("postgres.handleQuery: unexpected RFQ byte %q", pc.state.lastUpstreamRFQ) + } +} +``` + +`SessionState{}` is the empty session state; 04c does not track `SET search_path` / `SET ROLE`. Per spec §7.7 unqualified objects under no search_path resolve to `object_resolution=unresolved`, which is the Plan 03 corpus expectation. + +### `forwardUpstreamUntilRFQ` (in `upstreamread.go`) + +Reads upstream frames one at a time: + +```go +type result struct { + BytesIn int64 + BytesOut int64 + RowsByStmt []int64 // len == count of CommandComplete frames + AffectedByStmt []int64 + LatencyMs int64 + ErrorCode string // empty when no upstream ErrorResponse +} +``` + +Frame handling: + +| Upstream frame | Action | +|---|---| +| `*RowDescription` | forward; row counter for current stmt resets to 0 | +| `*DataRow` | forward; increment current stmt's row counter; add body length to `BytesOut` | +| `*CommandComplete` | parse `CommandTag` for affected count (see below); push current stmt's row count to `RowsByStmt`; push affected to `AffectedByStmt`; advance current-stmt index; forward | +| `*ErrorResponse` | forward; capture `SQLState` into `result.ErrorCode`; remaining stmts (if known) get null counters and `error_code: STATEMENT_ABORTED_BY_PRIOR_ERROR` at event-builder time | +| `*ReadyForQuery` | update `pc.state.lastUpstreamRFQ = m.TxStatus`; forward; flush; `result.LatencyMs = (now - sentAt).ms`; return | +| `*NoticeResponse` / `*ParameterStatus` / `*NotificationResponse` | forward verbatim; do not affect counters | +| other (`*ParameterDescription`, etc.) | forward verbatim | + +`CommandComplete.CommandTag` parsing: + +- `INSERT ` → affected = n +- `UPDATE `, `DELETE `, `MOVE `, `FETCH `, `COPY ` → affected = n +- `SELECT ` → affected = nil; rows = n (already counted via DataRow) +- everything else (`CREATE TABLE`, `BEGIN`, `COMMIT`, `SET`, etc.) → both nil + +### Per-statement counter attribution + +The *i*-th `CommandComplete` belongs to the *i*-th `ClassifiedStatement`. `DataRow` frames between `CommandComplete[i-1]` and `CommandComplete[i]` belong to stmt *i*. `latency_ms` / `bytes_in` / `bytes_out` are batch-level metrics; we attribute the **same** value to every per-stmt event in the batch (documented in event-builder godoc). When counts don't line up (fewer `CommandComplete` frames than statements — happens when upstream `ErrorResponse` aborts mid-batch), the remaining stmts get null `rows_returned` / `rows_affected` and `error_code: STATEMENT_ABORTED_BY_PRIOR_ERROR`. + +## 6. Deny synthesis (`deny.go`) + +`synthErrorAndRFQ(sqlstate, message)` writes: + +``` +ErrorResponse{Severity:"ERROR", SQLState: sqlstate, Message: message} +ReadyForQuery{TxStatus:'I'} +``` + +both flushed before returning. Used when `lastUpstreamRFQ ∈ {0, 'I'}`. + +`synthErrorOnly(sqlstate, message)` writes the `ErrorResponse` only; caller closes both conns. Used when `lastUpstreamRFQ ∈ {'T', 'E'}`. + +`pickDenySynth(decisions)` returns `(rendered, sqlstate)`: + +- Iterates decisions in order; first denying entry wins (most-restrictive is deterministic per §10.2 with stable rule order). +- `sqlstate` (on-wire `ErrorResponse.SQLState`): `28000` for connection-rule deny (matches 04b₂'s pattern); `42501` for statement-rule deny (PG-standard "insufficient privilege"); `42501` also for the approve→deny stub case (`decisions[i].Approval != nil`). +- `rendered`: from `decisions[i].DenyMessage` (Plan 02 template) if present; else `"denied by AgentSH policy: "` (or `"denied by AgentSH policy: "` for implicit-deny entries with empty RuleName). + +The synth function only owns the on-wire side. The corresponding `EventResult.ErrorCode` set by the event builder is: +- `APPROVE_NOT_YET_SUPPORTED` for approve→deny stubs. +- `DENIED_BY_SIBLING` for non-denying statements in a batch where another statement denied (see §8). +- empty for the actual denying statement (its `decision.verb=deny` carries the signal). + +`pickDenySynth` never returns nil; deterministic for tests. + +## 7. DBEvent schema (`internal/db/events/event.go`) + +```go +type DBEvent struct { + // existing fields unchanged... + + TLS EventTLS `json:"tls"` + Decision EventDecision `json:"decision"` + Result EventResult `json:"result"` + TxContext EventTxContext `json:"tx_context"` + Predicates EventPredicates `json:"predicates,omitempty"` +} + +type EventTLS struct { + Mode string `json:"mode"` // passthrough|terminate_reissue|terminate_plaintext_upstream + ClientSNI string `json:"client_sni,omitempty"` + UpstreamCertSubject string `json:"upstream_cert_subject,omitempty"` // empty in 04c +} + +type EventDecision struct { + Verb string `json:"verb"` // allow|deny|approve|audit (approve never emitted live in 04c) + RuleKind string `json:"rule_kind"` // statement|connection|cancel + RuleName string `json:"rule_name,omitempty"` + MatchingEffectIndex int `json:"matching_effect_index"` + MatchingEffectGroup string `json:"matching_effect_group,omitempty"` + Reason string `json:"reason,omitempty"` + ContributingAuditRules []string `json:"contributing_audit_rules,omitempty"` +} + +type EventResult struct { + RowsReturned *int64 `json:"rows_returned"` + RowsAffected *int64 `json:"rows_affected"` + BytesIn int64 `json:"bytes_in"` + BytesOut int64 `json:"bytes_out"` + LatencyMs int64 `json:"latency_ms"` + ErrorCode string `json:"error_code,omitempty"` +} + +type EventTxContext struct { + InTransaction bool `json:"in_transaction"` + TxStartedAt time.Time `json:"tx_started_at,omitempty"` // zero in 04c; Plan 05 + DenyAction string `json:"deny_action"` // none|rollback_injected|connection_terminated +} + +type EventPredicates struct { + HasFilter bool `json:"has_filter"` +} +``` + +`RowsReturned` / `RowsAffected` are `*int64` because the spec wire form needs `null` for "not applicable": a `SELECT` event gets `RowsReturned` and leaves `RowsAffected` null; an `INSERT` does the inverse; `CREATE TABLE` leaves both null. + +## 8. Redaction and digest (`eventbuilder.go`) + +`buildStatementEvent(stmt, decision, sql, result, denyAction, tier, conn)` is pure and unit-testable. Render table: + +| `tier` | `StatementText` | `StatementDigest` | +|---|---|---| +| `RedactFull` | `sql[stmt.SourceStart:stmt.SourceEnd]` (verbatim per-stmt slice) | `sha256:` + hex(SHA-256(Normalize(slice))) | +| `RedactParametersRedacted` (default) | `Normalize(slice)` | same as above | +| `RedactNone` | omitted (empty) | same as above | + +When `stmt.SourceStart == stmt.SourceEnd == 0` (parser couldn't supply span — `unknown` statement, or fallback's degenerate case), `slice = sql` and a `parser_backend_caveat: "no_span"` tag is set on the event's `ParserBackend` field (free-form per Plan 01's schema). + +`Normalize` errors degrade to the verbatim trimmed SQL; the digest is still populated. + +### Multi-statement deny tagging (`denied_by_sibling`) + +When `anyDeny` is true and statement *i* is not itself denying, its emitted event carries: + +- `decision.verb = "deny"` (most-restrictive batch outcome, per §10.2). +- `decision.rule_name = ""`, `decision.rule_kind = "statement"`, `decision.reason = "denied by sibling statement"`. +- `result.error_code = "DENIED_BY_SIBLING"`. +- `result.rows_returned = nil`, `result.rows_affected = nil`, `result.bytes_in = len(q.String)`, `result.bytes_out = 0`, `result.latency_ms = 0`. + +The denying statement(s) emit a normal `verb=deny` event with the actual `rule_name` / `reason` from `decisions[i]` and empty `result.error_code`. + +### `command_id` for multi-statement batches + +`command_id = ":"` for each per-stmt event. Operators correlate batch members on this prefix. Open question for v0.9 spec discussion: whether `batch_id` deserves its own field. + +## 9. Hot-swap and classifier wiring + +### `Server.SetPolicy` + +```go +type Server struct { + // existing fields unchanged... + policyPtr atomic.Pointer[policy.RuleSet] +} + +func (s *Server) SetPolicy(rs *policy.RuleSet) { s.policyPtr.Store(rs) } +func (s *Server) policy() *policy.RuleSet { return s.policyPtr.Load() } +``` + +`New(cfg)` calls `s.policyPtr.Store(cfg.Policy)` before returning. `nil` policy is legal and means "implicit deny everywhere" (matches Plan 02's `Evaluate(stmt, nil, _)` contract). + +### `classifierFor` + +```go +func (s *Server) classifierFor(dialect string) postgres.Parser { + if s.cfg.classifierForTest != nil { + return s.cfg.classifierForTest(dialect) + } + p, ok := s.classifiers[dialect] + if !ok { return s.classifiers["postgres"] } // shouldn't happen — New validated + return p +} +``` + +`s.classifiers` is set once in `New()`; no locking needed because it's read-only after construction. + +## 10. Testing strategy + +### Unit tests (table-driven, per file) + +| File | Cases | +|---|---| +| `simplequery_test.go` | Single allow; single deny pre-tx; single deny in-tx (`'T'` and `'E'`); multi-stmt all-allow; multi-stmt anyDeny → none-forwarded + N events with denied_by_sibling tagging; approve → APPROVE_NOT_YET_SUPPORTED synth + event verb=deny; frame > MaxQueryBytes → 54000 + close + FRAME_TOO_LARGE lifecycle; non-Q/non-X → 0A000 + close + EXTENDED_QUERY_NOT_SUPPORTED; classifier returns `unknown` → strict-coverage deny via policy.Evaluate. Classifier injected via `classifierForTest`. | +| `upstreamread_test.go` | Per-frame demux: DataRow counting; CommandComplete tag parsing (`INSERT 0 5`, `UPDATE 3`, `SELECT 7`, `DELETE 0`, `MOVE 0`, `CREATE TABLE` → null); per-stmt split via CommandComplete boundaries; latency_ms monotonic; bytes_in/out; ErrorResponse mid-batch → remaining stmts get `STATEMENT_ABORTED_BY_PRIOR_ERROR`; lastUpstreamRFQ updates from various 'Z' status bytes. | +| `deny_test.go` | RFQ-byte gating: `{0, 'I'}` → local synth + loop continues; `{'T', 'E'}` → ErrorResponse only + terminate; BEGIN-then-deny sequence; SQLSTATE selection (28000 for conn rule; 42501 for stmt rule and APPROVE_NOT_YET_SUPPORTED); deny_message template substitution. | +| `eventbuilder_test.go` | Redaction tiers: full → verbatim per-stmt slice; parameters_redacted → Normalize; none → empty StatementText. Digest stability: identical across tiers for same stmt. Multi-stmt: distinct EventID per stmt; command_id = `:`. Predicates.HasFilter mirrored from ClassifiedStatement. EventTLS.Mode mirrors svc.TLSMode. | +| `classifiers_test.go` | New() builds Parser keyed by dialect; same dialect shares instance; unknown dialect → New() error; classifierForTest overrides. | +| `parser_normalize_test.go` (in classify/postgres) | Normalize on representative SQL: literal scrubbing, identifier preservation, multi-stmt, error path. Linux+CGO and pure-Go variants both tested via the existing build-tag split. Curated-subset parity test. | + +### Spine integration test (`spine_test.go`) + +One test, three parallel subtests, real `pgx` + real `*Server` + fake upstream: + +- **Allow**: `SELECT 1` → fake upstream replies with `RowDescription` + `DataRow{[1]}` + `CommandComplete("SELECT 1")` + `ReadyForQuery('I')`. Client receives row 1; SyncSink has one `db_statement` event with `Verb=allow`, `RowsReturned=1`, `RowsAffected=nil`, `ErrorCode=""`. +- **Pre-tx deny**: `DELETE FROM t` → policy denies. Client receives `ErrorResponse(42501)` + `ReadyForQuery('I')`. Fake upstream asserts zero post-Startup bytes received. SyncSink has one `db_statement` event with `Verb=deny`. +- **In-tx deny terminate**: `BEGIN` (allowed) → fake upstream `'Z'` with `T`; then `DELETE FROM t` → policy denies. Client receives `ErrorResponse(42501)` and conn closes. SyncSink has BEGIN event `Verb=allow` and DELETE event `Verb=deny` with `tx_context.deny_action="connection_terminated"`. + +`pgx` added to `go.mod` as a test-only dep (imported only from `_test.go`). `go mod tidy` clean after. + +### Cross-compile + +`GOOS=windows go build ./...` stays green via existing `stub_other.go`. New files all `//go:build linux`. `events.DBEvent` schema extension lives in a build-tag-free file so non-Linux callers compile. + +### Test isolation + +`t.TempDir()` for StateDir; per-subtest `*Server`; explicit `t.Cleanup` to drain goroutines; tight deadlines on Read/Write in spine test to avoid net.Pipe flake (see prior `internal/store` Windows-timer flake pattern in memory). + +## 11. Open questions and risks + +### Open questions + +1. **`command_id` shape for multi-stmt batches.** Adopting `command_id = ":"` in 04c. Spec §8 doesn't pin a format. Whether to add a real `batch_id` field is a v0.9 spec discussion, not a 04c blocker. +2. **Upstream `ErrorResponse` SQLSTATE prefix.** When upstream errors on an allowed stmt, we record `result.error_code = ""` (raw, no `UPSTREAM_` prefix). Documented for operator consumption. +3. **`STATEMENT_ABORTED_BY_PRIOR_ERROR`** is 04c-coined for "previous stmt errored mid-batch, this one never executed." Free-form per spec §8. +4. **Pure-Go `Normalize` divergence.** Digest stability is within an implementation, not across. Documented; cross-host normalization unification is a Plan 03 problem. +5. **Plan 03 `ClassifiedStatement.SourceStart/SourceEnd` surface bump** is owned by 04c; accepted scope creep. + +### Risks + +- **Async LISTEN/NOTIFY pushes outside Q…Z round-trips** are not delivered in 04c. Documented as a known limitation; Plan 05's two-goroutine state machine fixes it. Chief regression risk for chatty notification consumers. +- **`Normalize` cross-backend digest divergence** is operator-confusing on multi-host deployments. +- **Spine test flakiness via `net.Pipe`.** Mitigation: tight deadlines, explicit `t.Cleanup`, `-race` runs in CI. +- **DBEvent schema bump** is a wire-shape change. Safe in 04c — no external consumers of events yet. +- **`MaxQueryBytes` enforcement after pgproto3 body allocation** is mitigation-grade rather than a hard ceiling. Documented; a future plan can swap to a streaming framer if memory griefing becomes real. + +### Deferred (covered earlier; restated for completeness) + +- **To Plan 05.** Extended Query (Parse/Bind/Describe/Execute/Sync/Flush/Close), SQL-level prepared cache, COPY data-frame handling, FunctionCall semantics, full §14 deny modes including `rollback_then_continue`, `tx_started_at`, `approve` runtime, GSSENC opt-in, async LISTEN/NOTIFY delivery. +- **To Plan 06.** BackendKeyData mapping, cancel governance via mapping lookup. +- **To Plan 07.** SO_PEERCRED → SessionID, out-of-process proxy, unavoidability bundle, real-PG integration suite, bypass-tool detection, recommendation flip to `enforce`. + +## 12. Rollout + +Ships behind `policies.db.unavoidability: off` (default) — no listener bound, no behavior change. `observe` mode is the recommended first flip: listeners bind, queries are intercepted, `db_statement` events emit. `enforce` is **not recommended** in 04c — the unavoidability bundle (network/file rules preventing direct egress) is still Plan 07. + +Plan release notes call out four known-limitation items: +- `approve` → `deny + APPROVE_NOT_YET_SUPPORTED`. +- `CancelRequest` forwarded un-mapped (broken-by-design until Plan 06). +- LISTEN/NOTIFY async pushes delivered only inside Q…Z round-trips. +- `client_identity` is `uid:` not SessionID (Plan 07). + +## 13. Done definition + +Plan 04c is done when: + +1. `internal/db/proxy/postgres` builds on Linux and stubs cleanly elsewhere; `GOOS=windows go build ./...` is green. +2. All unit tests above pass, including the spine integration test with real `pgx` + fake upstream covering allow / pre-tx deny / in-tx deny terminate. +3. A YAML config with `policies.db.unavoidability: observe`, one `db_services` entry, and a sample policy with both `allow` and `deny` rules produces: + - A `pgx` client connecting through the Unix socket runs `SELECT 1` successfully. + - The same client running a denied `DELETE` receives `ErrorResponse(42501)` + `ReadyForQuery('I')` without any upstream traffic. + - The audit sink contains one `db_statement` event per statement with `verb`, `effects`, `statement_digest`, `statement_redaction`, `decision`, `result`, `tx_context`, `predicates`, and `tls` sub-structs populated. +4. A YAML config with `policies.db.unavoidability: off` remains a no-op: no listener bound, no events emitted, no behavior change. +5. `go test ./...` is green on Linux; `go mod tidy` clean after `pgx` test-only dep addition. diff --git a/docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md b/docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md new file mode 100644 index 000000000..e93b1c37a --- /dev/null +++ b/docs/superpowers/specs/2026-05-11-docker-sandboxes-mixin-kit-design.md @@ -0,0 +1,197 @@ +# Docker Sandboxes mixin kit for AgentSH — design + +**Date:** 2026-05-11 +**Status:** Draft, awaiting review +**Owner:** Eran Sandler + +## 1. Goal + +Ship a Docker Sandboxes "mixin kit" that installs AgentSH into any sandbox at creation and routes the agent's command-level activity through a coding-agent-tuned policy. Invoked as: + +``` +sbx run --kit git+https://github.com/erans/agentsh.git#dir=docker/sbx-kit +``` + +It must work on stock `claude` (Claude Code), `opencode`, and `gemini` agent kits with no manual setup beyond the `--kit` flag. + +## 2. Background: Docker Sandboxes mixin kits + +A mixin kit is a `spec.yaml` (+ optional `files/` tree) with `kind: mixin`. It layers onto an existing agent kit and exposes three lifecycle hooks: + +- **`install`** — runs once during sandbox creation, defaults to root. +- **`initFiles`** — runtime-written files with `${WORKDIR}` substitution. +- **`startup`** — runs at every sandbox start, non-interactive, can background. Dispatches *before* the agent entrypoint attaches. + +Mixins **cannot** override the agent kit's `entrypoint`. That constraint shapes the design. + +References: +- +- +- + +## 3. Non-goals (v1) + +- Full kernel-level enforcement (seccomp user_notif, ptrace, fanotify, LSM). Those tiers are deferred; the bootstrap has a flag for them but they are off. +- LD_PRELOAD interception. Deferred — needs a new shim library that AgentSH doesn't ship today. Forward-compatible tier label preserved. +- OCI registry publishing. Deferred; git URL is sufficient. +- Listing in `docker/sbx-kits-contrib`. Submit after v1 is stable. +- Windows / WSL2 sandbox support. Docker Sandboxes are Linux containers. + +## 4. Enforcement model + +**Tiered, auto-detect, fail-open.** The bootstrap probes capabilities at startup and lights up the strongest tier the sandbox allows. v1 ships exactly one tier (shims); the bootstrap and tier file are designed so future tiers (LD_PRELOAD, ptrace) can be added without changing the kit's external contract. + +| Tier | What it covers | Capability dependency | v1 status | +|------|----------------|-----------------------|-----------| +| `shim` | Agent subprocess execs (every command the agent shells out to) | None | **enabled** | +| `shim+ldpreload` | In-process libc calls in libc-linked agents (Node/Python) | None (env-injection survival) | parked | +| `shim+ptrace` | All syscalls of the agent's tree | CAP_SYS_PTRACE + `yama.ptrace_scope ≤ 1` | parked | + +The active tier is written to `/run/agentsh/tier` (one of: `none`, `shim`, `shim+ldpreload`, `shim+ptrace`). All other code paths read it from there. + +**Rationale.** Shims always work — no capability, kernel-version, or env-survival dependency — and AgentSH already ships `agentsh-shell-shim`. The dominant threat surface in Docker Sandbox coding agents is the agent shelling out (`pip install`, `curl | bash`, `rm`, `git clone`); shims cover that. LD_PRELOAD adds coverage for in-process I/O but is its own engineering project. Post-hoc ptrace is brittle (ptrace_scope rules, signal stacking — see `project_seccomp_user_notif_stacking.md`) and not worth its complexity at v1. + +## 5. Kit layout + +``` +docker/sbx-kit/ +├── spec.yaml # mixin manifest +├── README.md # human-facing usage docs +├── tests/ # validation script + expected outputs +│ └── coding-agent-smoke.sh +└── files/ + ├── workspace/ + │ └── .claude/ + │ └── skills/ + │ └── agentsh/ + │ └── SKILL.md # teaches Claude Code to extend the policy + └── home/ + └── agent/ + └── .agentsh/ + └── policy.yaml # empty stub; user-override location +``` + +## 6. spec.yaml + +```yaml +schemaVersion: "1" +kind: mixin +name: agentsh +displayName: AgentSH +description: Policy-enforced execution gateway for AI coding agents + +commands: + install: + - command: "/bin/sh -c 'curl -fsSL https://github.com/erans/agentsh/releases/latest/download/install.sh | sh'" + user: "0" + description: Install agentsh release artifact + + initFiles: + - path: /etc/profile.d/agentsh.sh + content: 'export PATH=/usr/lib/agentsh/shims:$PATH' + mode: "0644" + + - path: /etc/environment.d/10-agentsh.conf + content: 'PATH=/usr/lib/agentsh/shims:/usr/local/bin:/usr/bin:/bin' + mode: "0644" + + startup: + - command: ["/usr/bin/agentsh-sbx-bootstrap"] + user: "0" + background: true + description: Merge policy, start agentsh server, probe enforcement tiers +``` + +The baked coding-agent policy template (§8) ships with the OS package at `/usr/share/agentsh/coding-agent.template.yaml` rather than via `initFiles`, so it benefits from the package's versioning and integrity checks. The bootstrap binary merges that template with the optional user-override fragment at `/home/agent/.agentsh/policy.yaml` and writes the result to `/etc/agentsh/policies/default.yaml` at every startup. `agentsh server` reads its server config from `/etc/agentsh/config.yaml` (installed by the OS package) and resolves the named `default` policy from `/etc/agentsh/policies/`. + +Network/credential blocks intentionally **omitted**. The Docker Sandbox proxy already handles outbound `allowedDomains` and credential injection. AgentSH adds value at the *path* and *command* layer inside the sandbox. + +## 7. Install & startup flow + +**At `sbx run` (install, once):** + +1. `install` command curls `https://github.com/erans/agentsh/releases/latest/download/install.sh` and pipes to `sh`. The script detects the sandbox's package manager (`dpkg`/`rpm`/`apk`) and installs the matching artifact from the same release. Binaries land at `/usr/bin/agentsh*` (including `/usr/bin/agentsh-sbx-bootstrap`); shim symlinks at `/usr/lib/agentsh/shims/`; the coding-agent policy template at `/usr/share/agentsh/coding-agent.template.yaml`; reference docs at `/usr/share/doc/agentsh/`. +2. `initFiles` sets PATH precedence via `/etc/profile.d/agentsh.sh` **and** `/etc/environment.d/10-agentsh.conf` (belt + suspenders for non-login shells). The user-override stub at `/home/agent/.agentsh/policy.yaml` ships in the kit's `files/` tree. +3. The `files/` tree drops the SKILL.md into `/workspace/.claude/skills/agentsh/`. + +**At every sandbox start (startup):** + +`agentsh-sbx-bootstrap` runs sequentially: + +1. **Merge policy.** Read the baked template at `/usr/share/agentsh/coding-agent.template.yaml`. If `/home/agent/.agentsh/policy.yaml` exists and parses, merge it on top — user wins on rule-name collisions, otherwise concatenate in declared order. Write the merged result to `/etc/agentsh/policies/default.yaml` (atomic write via tmp file + rename). On any merge or parse error, log loudly and fall back to writing the bare template — never leave the file in an inconsistent state. +2. **Spawn the daemon.** `agentsh server --config /etc/agentsh/config.yaml`, backgrounded; logs to `/var/log/agentsh/daemon.log`. The server config is the one installed by the package and points `policies.dir` at `/etc/agentsh/policies/` with `default` as the active policy name. +3. **Wait up to 2s for the daemon's socket** at the location declared in the server config. If it never appears, fail-open: write `/run/agentsh/tier=none`, log a banner to `/var/log/agentsh/bootstrap.log`, exit non-zero so the failure appears in startup output. +4. **Probe tier 1 (shim).** Spawn `/bin/sh -c 'command -v curl'` in a fresh child and verify the resolved path is under `/usr/lib/agentsh/shims/`. Record `tier=shim` on success. +5. **Tier 2 / tier 3 probes** are stubbed out in v1. +6. **Write `/run/agentsh/tier`** with the active tier name. + +**Failure semantics:** fail-open with loud logging. We never brick a user's sandbox. Degradation is visible via the tier file and bootstrap log; the agent's SKILL.md teaches it to read both. + +## 8. Default policy (`/etc/agentsh/policy.yaml`) + +Tuned around what coding agents actually do. Adds path/command granularity inside the sandbox; does **not** duplicate the Docker Sandbox proxy's network controls. + +**File rules:** +- `/workspace/**` — full read/write. Soft-delete on `rm`/`rmdir` so a runaway `rm -rf` is recoverable from `/var/lib/agentsh/trash/`. +- `/home/agent/**` — allow read/write, **deny** `~/.ssh/**`, `~/.aws/**`, `~/.gnupg/**`, `~/.kube/**`, `~/.docker/config.json`, `~/.netrc`, `~/.config/gcloud/**`, `~/.config/{gh,git-credentials}`. (Self-protection against credential exfiltration if these leaked into the sandbox image.) +- `/etc/agentsh/**`, `/opt/agentsh/**`, `/run/agentsh/**`, `/var/lib/agentsh/**`, `/var/log/agentsh/**` — **deny write**. The agent cannot edit its own policy or tamper with logs. +- System paths (`/usr/**`, `/lib/**`, `/lib64/**`, `/bin/**`, `/sbin/**`, `/etc/hosts`, `/etc/resolv.conf`, `/etc/ssl/**`, `/etc/ca-certificates/**`, `/etc/localtime`) — read-only allow. +- Package manager caches (`~/.npm/**`, `~/.cache/pip/**`, `~/.cargo/**`, `~/.cache/go-build/**`, `~/.rustup/**`, `~/.gradle/caches/**`, `~/.m2/**`) — full allow. + +**Command rules:** +- `curl`/`wget` invocations that pipe to a shell — **audit** (allow with audit event). v1 ships audit-only because a dedicated `agentsh-fetch` redirect target does not exist yet; v1.1 can swap audit for redirect once that binary lands. +- `sudo`, `su` — **deny**. The sandbox already pins the agent to a fixed user; escalation is suspicious. +- `chmod 777`, `chmod -R` rooted at `/` or `/home` — **approve**. +- Package installers (`pip install`, `npm install`, `cargo install`, `apt-get install`) — **allow + audit**. Routine for coding work. + +**Signal rules:** +- Allow signals within the agent's own subprocess tree. +- Deny signals targeting `agentsh*` processes or PID 1. + +**Resource limits, approvals, MCP rules, HTTP services, DB rules:** off by default. Advanced surface; user opts in via override. + +## 9. Self-teaching docs + +**Primary: `files/workspace/.claude/skills/agentsh/SKILL.md`** + +Lives under the standard Claude Code skill path (the convention used by the official kit examples). Claude Code auto-discovers it. The SKILL is descriptive: it tells the agent which files to read (`/run/agentsh/tier`, `/etc/agentsh/policies/default.yaml`, `/home/agent/.agentsh/policy.yaml`), shows the shape of a rule, and points to the full reference at `/usr/share/doc/agentsh/policy-reference.md`. To extend, the agent writes YAML to `/home/agent/.agentsh/policy.yaml` and restarts the sandbox (the bootstrap re-runs the merge on next start). + +**Secondary: `docker/sbx-kit/README.md`** — human-facing. Covers invocation, verification (`sbx exec cat /run/agentsh/tier`), audit-event viewing, daemon log tailing, and a one-line OpenCode/Gemini setup step (copy/symlink the SKILL into the agent's discovery path; in v1 we don't try to clobber `AGENTS.md` or other workspace-root files declaratively). + +**Override mechanism the SKILL.md depends on:** `agentsh-sbx-bootstrap` merges `/home/agent/.agentsh/policy.yaml` over `/usr/share/agentsh/coding-agent.template.yaml` on every startup and writes the result to `/etc/agentsh/policies/default.yaml`. Precedence: user wins on rule-name collisions; otherwise rules are concatenated in declared order. The merge is implemented in `internal/policy/merge.go` (new helper); no changes to the existing policy loader are needed. + +## 10. Prerequisites (must land before v1 ships) + +1. **`install.sh` at a stable release URL.** New artifact published by the existing release workflow. The script detects distro and installs the matching `.deb`/`.rpm`/`.apk`. Must be reachable at `https://github.com/erans/agentsh/releases/latest/download/install.sh`. +2. **`/usr/lib/agentsh/shims/` directory in the OS packages.** Short list to start: `bash`, `sh`, `curl`, `wget`, `pip`, `pip3`, `npm`, `node`, `git`, `python`, `python3`, `rm`. Symlinks to `/usr/bin/agentsh-shell-shim`, installed via `nfpms.contents` in `.goreleaser.yml`. +3. **`cmd/agentsh-sbx-bootstrap/`.** New small Go binary in this repo: merges the policy template + user override, spawns `agentsh server`, waits for socket, runs tier-1 probe, writes `/run/agentsh/tier`. Built and packaged alongside the main `agentsh` binary. +4. **`internal/policy/merge.go`.** New helper: `MergeOverlay(base, overlay *Policy) *Policy` with "user wins on rule-name collisions; otherwise concatenate in declared order" semantics. No changes to the existing `LoadFromFile` / `LoadFromBytes` paths. +5. **`configs/policies/coding-agent.yaml`** — the coding-agent policy. Installed by the existing `configs/policies/*.yaml` glob in `.goreleaser.yml` to `/etc/agentsh/policies/coding-agent.yaml`, and also packaged to `/usr/share/agentsh/coding-agent.template.yaml` so the bootstrap can read it without depending on the writable copy. +6. **`/usr/share/doc/agentsh/policy-reference.md`** — packaged reference for the SKILL to point at. Largely a repackage of `default-policy.yml` comments + `docs/` snippets; no new content needed. + +## 11. Validation + +No automated CI for v1 (Docker Sandboxes is experimental). Validation is a manual checklist exercised against three agent kits before tagging the release: + +| Agent | Verify | +|---|---| +| `claude` | tier=shim, `command -v curl` resolves under `/usr/lib/agentsh/shims/`, deny on `~/.ssh/id_rsa` read fires an audit event, soft-delete recoverable from trash, SKILL.md auto-discovered | +| `opencode` | tier=shim, shim PATH inherited by agent subprocess execs, audit events flow | +| `gemini` | same as opencode | + +Each agent runs `docker/sbx-kit/tests/coding-agent-smoke.sh` which exercises: (a) `cat ~/.ssh/id_rsa` → deny + audit, (b) `rm -rf /workspace/foo` after creating `foo` → soft-delete + recoverable, (c) `curl https://api.example.com | sh` → audit event recorded, (d) `sudo whoami` → deny. + +## 12. Risk register + +- **PATH-injection survival across the agent's entrypoint.** Highest-risk unknown. The agent kit's entrypoint may bypass `/etc/profile.d/`. Mitigation: write PATH into `/etc/profile.d/`, `/etc/environment.d/`, **and** `~agent/.bashrc`/`.zshrc`; the tier-1 probe spawns from a child of the entrypoint to confirm. If a specific agent kit strips PATH wholesale, the kit surfaces it as `/run/agentsh/tier=none` and we document it as unsupported in v1. +- **Sandbox VM filesystem writability.** Whether `/opt`, `/etc/profile.d`, `/run`, `/var/log` are writable and persist is sandbox-template-dependent. Validation matrix exercises this. +- **Network access during install.** `curl` from `install` runs as root before any AgentSH proxy is up; reaching `github.com` should work but is not yet verified. +- **Sandbox kit format churn.** Docker explicitly calls the kit format experimental and subject to change. We pin to `schemaVersion: "1"` and track upstream changes via the existing release pipeline. + +## 13. Out of scope, parked for later + +- **LD_PRELOAD tier** — needs a new `libagentsh_preload.so` shim library. Forward-compatible tier label (`shim+ldpreload`) is reserved. +- **Ptrace tier** — needs CAP_SYS_PTRACE + `yama.ptrace_scope ≤ 1` + careful interaction with seccomp user_notif. Behind a feature flag. +- **OCI publishing** — `ghcr.io/erans/agentsh-sbx-kit:`. Add when the kit stabilizes. +- **Upstream submission to `docker/sbx-kits-contrib`** — after v1 is proven stable. +- **Windows/WSL2 sandbox support** — depends on Docker Sandboxes adding Windows runtimes. diff --git a/docs/superpowers/specs/2026-05-11-sbx-agent-wrap.md b/docs/superpowers/specs/2026-05-11-sbx-agent-wrap.md new file mode 100644 index 000000000..30599ab8a --- /dev/null +++ b/docs/superpowers/specs/2026-05-11-sbx-agent-wrap.md @@ -0,0 +1,236 @@ +# Auto-wrapping the agent harness via `agentsh wrap` — design + +**Date:** 2026-05-11 +**Status:** Revised after real-agent-kit probe revealed PATH precedence design flaw +**Owner:** Eran Sandler +**Parent spec:** `2026-05-11-docker-sandboxes-mixin-kit-design.md` + +## 0. Revision history + +- **v1 (original):** PATH precedence — drop wrapper at `/usr/local/bin/`, rely on it preceding the real binary in PATH. +- **v2 (this version, after probing `docker/sandbox-templates:{opencode,codex,gemini}`):** real agent kits install their binaries via npm at `/usr/local/share/npm-global/bin/`, which **precedes** `/usr/local/bin` in PATH. The v1 wrapper would never have fired against any real agent kit. Switched to **move-aside-and-replace** at the discovered binary location. + +## 1. Goal + +After the AgentSH mixin kit installs, the sandbox's agent binary (`claude`, `opencode`, `gemini`, `codex`, `cursor`) is launched as a child of `agentsh wrap`. The harness — not just its tools — runs under AgentSH's full exec-pipeline interception (session, policy, audit, report). + +## 2. Constraint reminder + +A `kind: mixin` kit cannot override the agent kit's entrypoint. The mixin can only modify the filesystem post-install. To intercept the agent's exec without entrypoint cooperation, the wrapper must occupy *the exact path the agent kit's entrypoint already resolves to* — not a path that happens to come earlier in PATH. + +The probe results (`docker run --rm docker/sandbox-templates:opencode bash -lc 'type -a opencode'`) show: + +``` +opencode is /usr/local/share/npm-global/bin/opencode +PATH=/home/agent/.local/bin:/usr/local/share/npm-global/bin:/usr/local/sbin:/usr/local/bin:... +``` + +`/usr/local/bin` is 4th in PATH; npm-global is 2nd. A wrapper at `/usr/local/bin/opencode` would be shadowed. The move-aside design avoids this entirely by relocating the agent binary in place. + +## 3. Non-goals + +- **No env-var opt-in.** Auto-wrap is the kit's purpose. +- **No SKILL.md manual-wrap guidance.** Harness is already wrapped; the LLM doesn't need to invoke wrap explicitly. +- **No "do not touch the agent kit's installed files" guarantee.** The move-aside approach explicitly renames the binary the agent kit shipped. This is the trade-off for actually engaging the wrap. + +## 4. Failure posture: fail-CLOSED (unchanged from v1) + +| Failure | Disposition | +|---|---| +| `${0}.real` missing | exit 127 + stderr — installer didn't run or someone deleted the moved-aside binary | +| `agentsh` binary missing | exit 1 + stderr | +| `/run/agentsh/tier` ≠ `shim` | exit 1 + stderr | +| `agentsh wrap` itself fails after exec | agent launch fails | + +## 5. Components (unchanged from v1) + +``` +packaging/ + agent-wrap.sh # the wrapper script (real path now derived from ${0}.real) + agent-wrap_test.sh # shell test of the wrapper + install-agent-wrappers.sh # discover-via-command-v + move-aside + symlink + install-agent-wrappers_test.sh # shell test of the installer + +docker/sbx-kit/ + spec.yaml # (unchanged) second install command + tests/run-e2e.sh # gains a real-agent-kit check using opencode + +.goreleaser.yml # (unchanged) packages the two scripts +``` + +## 6. The wrapper (`/usr/lib/agentsh/agent-wrap`) + +```sh +#!/bin/sh +# Invoked via symlinks placed by install-agent-wrappers.sh at the original +# location of each agent binary (e.g. /usr/local/share/npm-global/bin/opencode). +# Real binary lives at the same path with a .real suffix. +# +# Fail-CLOSED: any health-check failure refuses launch. + +set -u + +# Gated test hook (parallel of v1 design). +if [ "${AGENTSH_TEST:-}" = "1" ]; then + FAKE_ROOT="${FAKE_ROOT:-}" +else + FAKE_ROOT="" +fi + +real="${0}.real" +tier_file="${FAKE_ROOT}/run/agentsh/tier" + +if [ ! -x "$real" ]; then + echo "agentsh-agent-wrap: real binary not found at $real; refusing to launch $(basename "$0")" >&2 + exit 127 +fi + +# command -v also reports shell functions; exec below dispatches to binaries +# only, so a function-named agentsh fails non-zero — still fail-closed. +if ! command -v agentsh >/dev/null 2>&1; then + echo "agentsh-agent-wrap: agentsh binary missing; refusing to launch $(basename "$0") without enforcement" >&2 + exit 1 +fi + +tier=$(cat "$tier_file" 2>/dev/null || echo missing) +if [ "$tier" != "shim" ]; then + echo "agentsh-agent-wrap: enforcement not active (tier='$tier'); refusing to launch $(basename "$0")" >&2 + exit 1 +fi + +exec agentsh wrap -- "$real" "$@" +``` + +Key change from v1: `real="${0}.real"` instead of `real="${FAKE_ROOT}/usr/bin/$name"`. The wrapper is now location-flexible — it works wherever the installer placed it. + +## 7. The installer (`/usr/lib/agentsh/install-agent-wrappers.sh`) + +Discovers each known agent via `command -v`, renames it to `.real`, drops a symlink to the wrap script at the original location. + +```sh +#!/bin/sh +# Discover known agent binaries via `command -v`, move them aside to a +# `.real` sibling, and put a symlink to /usr/lib/agentsh/agent-wrap in +# the original location. The agent kit's entrypoint, which resolves the +# agent via PATH lookup, then hits our symlink wherever the binary lived. +# +# Idempotent. Fail-open if the wrap script itself is missing. + +set -eu + +if [ "${AGENTSH_TEST:-}" = "1" ]; then + FAKE_ROOT="${FAKE_ROOT:-}" + PATH="${FAKE_TEST_PATH:-$PATH}" # test can override the search path +else + FAKE_ROOT="" +fi + +WRAP="${FAKE_ROOT}/usr/lib/agentsh/agent-wrap" + +# Known agent binaries. Extend as Docker Sandboxes adds support. +AGENTS="claude opencode gemini codex cursor" + +if [ ! -x "$WRAP" ]; then + echo "install-agent-wrappers: agent-wrap missing at $WRAP; skipping (kit still works without auto-wrap)" >&2 + exit 0 +fi + +for agent in $AGENTS; do + # Discover via PATH lookup, matching how the agent kit's entrypoint + # would resolve the binary. + real=$(command -v "$agent" 2>/dev/null || true) + if [ -z "$real" ] || [ ! -x "$real" ]; then + continue # agent not installed + fi + + # Idempotency: if $real is already our symlink AND $real.real exists, + # the agent is already wrapped — silent skip. + if [ -L "$real" ] && [ "$(readlink "$real")" = "$WRAP" ] && [ -e "${real}.real" ]; then + continue + fi + + # Conflict detection: if $real.real already exists but $real is NOT + # our symlink, something else owns the location — don't touch it. + if [ -e "${real}.real" ]; then + echo "install-agent-wrappers: ${real}.real already exists but $real is not our symlink; not overwriting" >&2 + continue + fi + + # Move-aside-and-replace. + mv "$real" "${real}.real" + ln -s "$WRAP" "$real" + echo "install-agent-wrappers: wrapped $agent at $real (real moved to ${real}.real)" >&2 +done +``` + +**Why `command -v` instead of probing `/usr/bin/` like v1?** Because real agent kits don't put their binaries in `/usr/bin`. `command -v` is the same PATH-search the agent kit's entrypoint uses, so we find the binary wherever it actually lives. This is the load-bearing fix. + +## 8. Kit integration (unchanged from v1) + +Same two install commands in `spec.yaml`: + +```yaml +commands: + install: + - command: "/bin/sh -c 'curl -fsSL https://github.com/erans/agentsh/releases/latest/download/install.sh | sh'" + user: "0" + description: Install agentsh from the latest GitHub release + - command: "/usr/lib/agentsh/install-agent-wrappers.sh" + user: "0" + description: Move-aside-and-replace detected agent binaries with /usr/lib/agentsh/agent-wrap +``` + +## 9. Testing + +**Wrapper script** (5 cases in `packaging/agent-wrap_test.sh`): +1. `${0}.real` missing → exit 127. +2. `agentsh` missing from PATH → exit 1. +3. Tier file says `none` → exit 1. +4. Tier file missing entirely → exit 1. +5. All three green → exec'd `agentsh wrap -- ${0}.real ` with args preserved. + +Test setup: place a fake binary at `.real`, place a symlink to the wrapper at ``, invoke via the symlink. Tests no longer reference `/usr/bin`. + +**Installer** (6 cases in `packaging/install-agent-wrappers_test.sh`): +1. No agents on PATH → no actions taken. +2. One agent on PATH → moved to `.real` + symlink created at original location. +3. Multiple agents → all wrapped at their respective discovered locations. +4. Pre-existing `.real` but `$path` is NOT our symlink → skipped with warning (foreign conflict). +5. Missing wrap script → exit 0 with warning, no moves. +6. Idempotent: re-run on already-wrapped tree → silent, no double-rename. + +Test setup uses `AGENTSH_TEST=1 FAKE_TEST_PATH=` to scope the `command -v` search to a controlled location. + +**E2E** (`docker/sbx-kit/tests/run-e2e.sh`): the existing check 8 (stub-based engagement check) is **replaced** with a real-agent-kit check: + +- Pull `docker/sandbox-templates:opencode` (publicly available; verified). +- Build the real `agentsh` binary on the host (CGO + libseccomp, same as `go build` in this worktree). +- Side-load the binaries, the wrap script, and the installer into the opencode container. +- Run the installer; verify `/usr/local/share/npm-global/bin/opencode` is now a symlink to `/usr/lib/agentsh/agent-wrap` and `opencode.real` exists. +- Bring up the daemon (or stub the wrap session-creation endpoint, per E2E-tractability). +- Invoke `opencode --help` from a login shell as the agent user. Assert `agentsh wrap` engaged AND opencode's real `--help` output appears (proving the move-aside binary still executes correctly). + +This is the real-agent E2E the v1 design never produced — the bug that prompted this revision would have been caught at this step. + +## 10. Documentation + +**`docker/sbx-kit/README.md`** "Behavior" section needs revision: +- Drop "PATH precedence" language. +- Describe the move-aside-and-replace mechanism. +- Update the "Known limitations" — remove the absolute-path-entrypoint caveat (move-aside doesn't depend on PATH order) but ADD: "the installer renames files the agent kit shipped; uninstalling cleanly requires restoring `.real` → ``." + +**`docs/policy-reference.md`** table additions are mostly fine but the `/usr/local/bin/` row needs to become `` since the location varies per agent kit. + +## 11. Risk register (revised) + +- **Move-aside collides with agent kit upgrades.** If the agent kit's image is rebuilt with a new agent binary at the same path, our move-aside would have already happened and the new binary would land *next to* our symlink, not under it. v1 mitigation: idempotency check detects "already wrapped" and skips. v2 mitigation: image rebuild is image-rebuild — a fresh sandbox triggers a fresh install which catches the new binary. +- **Uninstall path is non-trivial.** Removing the kit no longer just removes a symlink — it needs to restore `.real` → ``. Out of scope for v1; document the manual recovery in the README. +- **Real `agentsh wrap` compatibility with opencode/codex/gemini.** Still untested against real agents until E2E lands. The new E2E directly exercises this. +- **CGO+libseccomp build complexity for the E2E.** The dev host has the deps; CI runners typically have `libseccomp-dev` available via apt. If the E2E build is too brittle, fall back to building agentsh in a Docker build stage. + +## 12. Out of scope (unchanged) + +- LD_PRELOAD / ptrace tiers. +- Wrapping subprocess binaries (already handled by the shim list). +- Clean uninstall logic. +- Detecting agent-kit image upgrades that swap the wrapped binary. diff --git a/internal/db/classify/postgres/backend_test.go b/internal/db/classify/postgres/backend_test.go index be7ad75ff..b34fa434d 100644 --- a/internal/db/classify/postgres/backend_test.go +++ b/internal/db/classify/postgres/backend_test.go @@ -43,3 +43,63 @@ func TestBackend_EmptyInputReturnsEmpty(t *testing.T) { t.Fatalf("empty SQL should produce no statements, got %d", len(got)) } } + +func TestParser_SourceSpan_Single(t *testing.T) { + p := New(DialectPostgres) + sql := "SELECT 1" + got, err := p.Classify(sql, SessionState{}, Options{}) + if err != nil { + t.Fatalf("Classify: %v", err) + } + if len(got) != 1 { + t.Fatalf("len=%d want 1", len(got)) + } + if got[0].SourceStart != 0 { + t.Fatalf("SourceStart=%d want 0", got[0].SourceStart) + } + if got[0].SourceEnd != int32(len(sql)) { + t.Fatalf("SourceEnd=%d want %d", got[0].SourceEnd, len(sql)) + } +} + +func TestParser_SourceSpan_MultiStmt(t *testing.T) { + p := New(DialectPostgres) + sql := "SELECT 1; SELECT 2" + got, err := p.Classify(sql, SessionState{}, Options{}) + if err != nil { + t.Fatalf("Classify: %v", err) + } + if len(got) != 2 { + t.Fatalf("len=%d want 2", len(got)) + } + if string(sql[got[0].SourceStart:got[0].SourceEnd]) != "SELECT 1" { + t.Fatalf("stmt[0] span = %q want %q", + string(sql[got[0].SourceStart:got[0].SourceEnd]), "SELECT 1") + } + if string(sql[got[1].SourceStart:got[1].SourceEnd]) != "SELECT 2" { + t.Fatalf("stmt[1] span = %q want %q", + string(sql[got[1].SourceStart:got[1].SourceEnd]), "SELECT 2") + } +} + +func TestParser_SourceSpan_TrailingStmtNoSemicolon(t *testing.T) { + // Single statement with no trailing semicolon — libpg_query reports + // StmtLen=0 for trailing single statements; classifyWithBackend must + // extend SourceEnd to len(sql). + p := New(DialectPostgres) + sql := "SELECT 1" + got, err := p.Classify(sql, SessionState{}, Options{}) + if err != nil { + t.Fatalf("Classify: %v", err) + } + if len(got) != 1 { + t.Fatalf("len=%d want 1", len(got)) + } + if got[0].SourceEnd != int32(len(sql)) { + t.Fatalf("SourceEnd=%d want %d (StmtLen=0 must extend to end)", + got[0].SourceEnd, len(sql)) + } + if got[0].SourceStart != 0 { + t.Fatalf("SourceStart=%d want 0", got[0].SourceStart) + } +} diff --git a/internal/db/classify/postgres/normalize_linux.go b/internal/db/classify/postgres/normalize_linux.go new file mode 100644 index 000000000..5816461d4 --- /dev/null +++ b/internal/db/classify/postgres/normalize_linux.go @@ -0,0 +1,9 @@ +//go:build linux && cgo + +package postgres + +import pg_query "github.com/pganalyze/pg_query_go/v6" + +func (p *cgoParser) Normalize(sql string) (string, error) { + return pg_query.Normalize(sql) +} diff --git a/internal/db/classify/postgres/normalize_other.go b/internal/db/classify/postgres/normalize_other.go new file mode 100644 index 000000000..4c36607cd --- /dev/null +++ b/internal/db/classify/postgres/normalize_other.go @@ -0,0 +1,9 @@ +//go:build !linux || !cgo + +package postgres + +import pgquery_wasm "github.com/wasilibs/go-pgquery" + +func (p *wasmParser) Normalize(sql string) (string, error) { + return pgquery_wasm.Normalize(sql) +} diff --git a/internal/db/classify/postgres/parser.go b/internal/db/classify/postgres/parser.go index 947e8edc8..d8470045b 100644 --- a/internal/db/classify/postgres/parser.go +++ b/internal/db/classify/postgres/parser.go @@ -7,6 +7,8 @@ package postgres import ( "fmt" "strings" + "unicode" + "unicode/utf8" pg_query "github.com/pganalyze/pg_query_go/v6" @@ -101,6 +103,10 @@ func (s SessionState) Clone() SessionState { // Parser is the single public surface. Implementations are returned by New. type Parser interface { Classify(sql string, sess SessionState, opts Options) ([]effects.ClassifiedStatement, error) + // Normalize returns SQL with all literal values replaced by $N placeholders. + // On parse failure returns the parser error verbatim; callers degrade to + // the verbatim trimmed SQL for digest computation. + Normalize(sql string) (string, error) } // New returns the parser for the given dialect, using whichever libpg_query @@ -160,6 +166,29 @@ func classifyWithBackend( out := make([]effects.ClassifiedStatement, 0, len(res.Stmts)) for _, raw := range res.Stmts { cs := classifyRawStmt(dialect, raw, sess, opts, backend) + // pg_query gives StmtLen=0 for a trailing single statement; in that + // case the statement runs from StmtLocation to end-of-input. + start := raw.StmtLocation + length := raw.StmtLen + var end int32 + if length == 0 { + end = int32(len(sql)) + } else { + end = start + length + } + // Skip leading whitespace to get the actual statement boundaries. + // libpg_query's StmtLocation can point at a separator's trailing + // whitespace in multi-statement input. Use utf8.DecodeRuneInString + // so multi-byte whitespace (e.g. U+00A0) is handled correctly. + for start < end { + r, width := utf8.DecodeRuneInString(sql[int(start):]) + if !unicode.IsSpace(r) { + break + } + start += int32(width) + } + cs.SourceStart = start + cs.SourceEnd = end out = append(out, cs) } return out, nil diff --git a/internal/db/classify/postgres/parser_normalize_test.go b/internal/db/classify/postgres/parser_normalize_test.go new file mode 100644 index 000000000..e72c71b70 --- /dev/null +++ b/internal/db/classify/postgres/parser_normalize_test.go @@ -0,0 +1,50 @@ +package postgres + +import ( + "strings" + "testing" +) + +func TestParser_Normalize_Literals(t *testing.T) { + p := New(DialectPostgres) + cases := []struct { + name string + in string + want string + }{ + {"int literal", "SELECT 1", "SELECT $1"}, + {"string literal", "SELECT 'hello'", "SELECT $1"}, + {"two literals", "SELECT 1, 'x'", "SELECT $1, $2"}, + {"identifier preserved", "SELECT a FROM t", "SELECT a FROM t"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got, err := p.Normalize(tc.in) + if err != nil { + t.Fatalf("Normalize(%q): %v", tc.in, err) + } + if got != tc.want { + t.Fatalf("Normalize(%q) = %q want %q", tc.in, got, tc.want) + } + }) + } +} + +func TestParser_Normalize_MultiStatement(t *testing.T) { + p := New(DialectPostgres) + got, err := p.Normalize("SELECT 1; SELECT 'x'") + if err != nil { + t.Fatalf("Normalize: %v", err) + } + if !strings.Contains(got, "$1") || !strings.Contains(got, "$2") { + t.Fatalf("Normalize did not redact both literals: %q", got) + } +} + +func TestParser_Normalize_Error(t *testing.T) { + p := New(DialectPostgres) + _, err := p.Normalize("THIS IS NOT SQL ;;;") + if err == nil { + t.Fatalf("Normalize on malformed SQL: want err, got nil") + } +} diff --git a/internal/db/effects/statement.go b/internal/db/effects/statement.go index fec547d8d..33f10a50f 100644 --- a/internal/db/effects/statement.go +++ b/internal/db/effects/statement.go @@ -29,6 +29,12 @@ type ClassifiedStatement struct { RawVerb string `json:"raw_verb,omitempty"` ParserBackend ParserBackend `json:"parser_backend,omitempty"` Error string `json:"error,omitempty"` + + // SourceStart / SourceEnd are byte offsets into the original SQL input + // (Plan 04c needs these to slice per-stmt text under RedactionFull). Both + // zero when the parser cannot supply them (e.g. unknown-statement path). + SourceStart int32 `json:"source_start,omitempty"` + SourceEnd int32 `json:"source_end,omitempty"` } // Primary returns the first (canonical) effect. ok=false on empty effects list. diff --git a/internal/db/effects/statement_test.go b/internal/db/effects/statement_test.go index e7962737d..36d9b84d4 100644 --- a/internal/db/effects/statement_test.go +++ b/internal/db/effects/statement_test.go @@ -4,6 +4,7 @@ package effects import ( "bytes" "encoding/json" + "strings" "testing" ) @@ -90,3 +91,38 @@ func TestClassifiedStatement_ErrorField(t *testing.T) { } }) } + +func TestClassifiedStatement_SourceSpan_RoundTrip(t *testing.T) { + in := ClassifiedStatement{ + Effects: []Effect{{Group: GroupRead, Resolution: ResolutionQualified}}, + RawVerb: "SELECT", + SourceStart: 7, + SourceEnd: 23, + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + var out ClassifiedStatement + if err := json.Unmarshal(bs, &out); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if out.SourceStart != in.SourceStart || out.SourceEnd != in.SourceEnd { + t.Fatalf("span lost: got (%d,%d) want (%d,%d)", + out.SourceStart, out.SourceEnd, in.SourceStart, in.SourceEnd) + } +} + +func TestClassifiedStatement_SourceSpan_ZeroOmitted(t *testing.T) { + in := ClassifiedStatement{ + Effects: []Effect{{Group: GroupRead, Resolution: ResolutionQualified}}, + RawVerb: "SELECT", + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if strings.Contains(string(bs), "source_start") || strings.Contains(string(bs), "source_end") { + t.Fatalf("zero span fields must be omitted: %s", bs) + } +} diff --git a/internal/db/events/event.go b/internal/db/events/event.go index 819f71cd9..8cba1b9e4 100644 --- a/internal/db/events/event.go +++ b/internal/db/events/event.go @@ -25,15 +25,67 @@ type DBEvent struct { Effects []effects.Effect `json:"effects"` - OperationGroup string `json:"operation_group,omitempty"` - OperationGroupID uint8 `json:"operation_group_id,omitempty"` - OperationSubtype string `json:"operation_subtype,omitempty"` - RawVerb string `json:"raw_verb,omitempty"` - ObjectResolution string `json:"object_resolution,omitempty"` + OperationGroup string `json:"operation_group,omitempty"` + OperationGroupID uint8 `json:"operation_group_id,omitempty"` + OperationSubtype string `json:"operation_subtype,omitempty"` + RawVerb string `json:"raw_verb,omitempty"` + ObjectResolution string `json:"object_resolution,omitempty"` StatementDigest string `json:"statement_digest,omitempty"` StatementText string `json:"statement_text,omitempty"` StatementRedaction Redaction `json:"statement_redaction"` ParserBackend effects.ParserBackend `json:"parser_backend,omitempty"` + + TLS EventTLS `json:"tls"` + Decision EventDecision `json:"decision"` + Result EventResult `json:"result"` + TxContext EventTxContext `json:"tx_context"` + Predicates EventPredicates `json:"predicates,omitempty"` +} + +// EventTLS mirrors spec §8 tls{}. UpstreamCertSubject is unpopulated in 04c. +type EventTLS struct { + Mode string `json:"mode"` + ClientSNI string `json:"client_sni,omitempty"` + UpstreamCertSubject string `json:"upstream_cert_subject,omitempty"` +} + +// EventDecision mirrors spec §8 decision{}. Verb is one of "allow"|"deny"| +// "approve"|"audit" (approve never emitted live in 04c; the runtime stubs it +// out as deny + APPROVE_NOT_YET_SUPPORTED). +type EventDecision struct { + Verb string `json:"verb"` + RuleKind string `json:"rule_kind"` + RuleName string `json:"rule_name,omitempty"` + MatchingEffectIndex int `json:"matching_effect_index"` + MatchingEffectGroup string `json:"matching_effect_group,omitempty"` + Reason string `json:"reason,omitempty"` + ContributingAuditRules []string `json:"contributing_audit_rules,omitempty"` +} + +// EventResult mirrors spec §8 result{}. RowsReturned / RowsAffected are +// pointers so JSON wire form carries null for "not applicable". +type EventResult struct { + RowsReturned *int64 `json:"rows_returned"` + RowsAffected *int64 `json:"rows_affected"` + BytesIn int64 `json:"bytes_in"` + BytesOut int64 `json:"bytes_out"` + LatencyMs int64 `json:"latency_ms"` + ErrorCode string `json:"error_code,omitempty"` +} + +// EventTxContext mirrors spec §8 tx_context{}. TxStartedAt is zero-valued +// in 04c; Plan 05's state machine populates it. DenyAction is one of +// "none"|"connection_terminated"|"rollback_injected" (last value Plan 05). +type EventTxContext struct { + InTransaction bool `json:"in_transaction"` + TxStartedAt time.Time `json:"tx_started_at,omitempty"` + DenyAction string `json:"deny_action"` } + +// EventPredicates mirrors spec §8 predicates{}. +type EventPredicates struct { + HasFilter bool `json:"has_filter"` +} + diff --git a/internal/db/events/event_test.go b/internal/db/events/event_test.go index cae2f2d6b..b8ad3d632 100644 --- a/internal/db/events/event_test.go +++ b/internal/db/events/event_test.go @@ -3,6 +3,7 @@ package events import ( "encoding/json" + "strings" "testing" "time" @@ -69,3 +70,67 @@ func TestDBEvent_JSONRoundTrip(t *testing.T) { t.Errorf("redaction lost: %v", out.StatementRedaction) } } + +func TestDBEvent_Extended_RoundTrip(t *testing.T) { + rows := int64(7) + in := DBEvent{ + EventID: "01HJ...", + SessionID: "sess-1", + Timestamp: time.Date(2026, 5, 10, 12, 0, 0, 0, time.UTC), + DBService: "appdb", + DBFamily: "postgres", + DBDialect: "postgres", + Effects: []effects.Effect{{Group: effects.GroupRead, Resolution: effects.ResolutionQualified}}, + + TLS: EventTLS{Mode: "terminate_reissue", ClientSNI: "db.example"}, + Decision: EventDecision{ + Verb: "allow", + RuleKind: "statement", + RuleName: "app-allow-read", + MatchingEffectIndex: 0, + MatchingEffectGroup: "read", + }, + Result: EventResult{ + RowsReturned: &rows, + BytesIn: 9, + BytesOut: 42, + LatencyMs: 3, + }, + TxContext: EventTxContext{InTransaction: false, DenyAction: "none"}, + Predicates: EventPredicates{HasFilter: true}, + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + var out DBEvent + if err := json.Unmarshal(bs, &out); err != nil { + t.Fatalf("Unmarshal: %v", err) + } + if out.Decision.Verb != "allow" || out.Result.LatencyMs != 3 { + t.Fatalf("round-trip mismatch: %+v", out) + } + if out.Result.RowsReturned == nil || *out.Result.RowsReturned != 7 { + t.Fatalf("rows_returned lost: %+v", out.Result.RowsReturned) + } + if out.Result.RowsAffected != nil { + t.Fatalf("rows_affected must be nil for null in wire form: %+v", + out.Result.RowsAffected) + } +} + +func TestDBEvent_Extended_RowsNull(t *testing.T) { + in := DBEvent{ + EventID: "01HJ...", + Timestamp: time.Now().UTC().Truncate(time.Second), + Result: EventResult{BytesIn: 9, BytesOut: 0, LatencyMs: 0}, + TxContext: EventTxContext{DenyAction: "none"}, + } + bs, err := json.Marshal(in) + if err != nil { + t.Fatalf("Marshal: %v", err) + } + if !strings.Contains(string(bs), `"rows_returned":null`) { + t.Fatalf("rows_returned must serialise as null when nil; got %s", bs) + } +} diff --git a/internal/db/proxy/postgres/classifiers.go b/internal/db/proxy/postgres/classifiers.go new file mode 100644 index 000000000..b517bfaaf --- /dev/null +++ b/internal/db/proxy/postgres/classifiers.go @@ -0,0 +1,42 @@ +//go:build linux + +package postgres + +import ( + "fmt" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" +) + +// buildClassifierMap constructs one Parser per distinct dialect across the +// supplied services. Services sharing a dialect share a Parser instance. +// Returns an error when any service's Dialect is not a recognized name. +func buildClassifierMap(svcs []Service) (map[string]classify_pg.Parser, error) { + out := make(map[string]classify_pg.Parser, 4) + for _, svc := range svcs { + if _, ok := out[svc.Dialect]; ok { + continue + } + d, ok := classify_pg.ParseDialect(svc.Dialect) + if !ok { + return nil, fmt.Errorf("postgres.New: services[%q].Dialect = %q is not a recognized dialect", + svc.Name, svc.Dialect) + } + out[svc.Dialect] = classify_pg.New(d) + } + return out, nil +} + +// classifierFor returns the parser registered for the given dialect. Falls +// back to the "postgres" parser if a lookup fails — buildClassifierMap +// validated dialects at New(), so this should not happen in practice. +// classifierForTest, when set on Config, overrides the map entirely. +func (s *Server) classifierFor(dialect string) classify_pg.Parser { + if s.cfg.classifierForTest != nil { + return s.cfg.classifierForTest(dialect) + } + if p, ok := s.classifiers[dialect]; ok { + return p + } + return s.classifiers["postgres"] +} diff --git a/internal/db/proxy/postgres/classifiers_test.go b/internal/db/proxy/postgres/classifiers_test.go new file mode 100644 index 000000000..aa91a9d67 --- /dev/null +++ b/internal/db/proxy/postgres/classifiers_test.go @@ -0,0 +1,53 @@ +//go:build linux + +package postgres + +import ( + "path/filepath" + "testing" + + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" +) + +func TestBuildClassifierMap_PerDialect(t *testing.T) { + svcs := []Service{ + {Name: "a", Family: "postgres", Dialect: "postgres", Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "a")}}, + {Name: "b", Family: "postgres", Dialect: "postgres", Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "b")}}, + {Name: "c", Family: "postgres", Dialect: "cockroachdb", Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "c")}}, + } + m, err := buildClassifierMap(svcs) + if err != nil { + t.Fatalf("buildClassifierMap: %v", err) + } + if len(m) != 2 { + t.Fatalf("map size = %d want 2 (postgres, cockroachdb)", len(m)) + } + if m["postgres"] == nil || m["cockroachdb"] == nil { + t.Fatalf("expected entries for both dialects, got %+v", m) + } +} + +func TestBuildClassifierMap_RejectsUnknown(t *testing.T) { + _, err := buildClassifierMap([]Service{ + {Name: "x", Family: "postgres", Dialect: "rabbitql"}, + }) + if err == nil { + t.Fatalf("expected error for unknown dialect, got nil") + } +} + +func TestServer_ClassifierFor_TestHookOverride(t *testing.T) { + calls := 0 + hook := func(dialect string) classify_pg.Parser { + calls++ + return classify_pg.New(classify_pg.DialectPostgres) + } + // Build a Server directly without going through New (avoiding the + // fully-populated config requirement for this isolated test). + s := &Server{cfg: Config{classifierForTest: hook}} + _ = s.classifierFor("postgres") + _ = s.classifierFor("anything") + if calls != 2 { + t.Fatalf("hook called %d times, want 2", calls) + } +} diff --git a/internal/db/proxy/postgres/server.go b/internal/db/proxy/postgres/server.go index eb16f06cf..a61dc9b75 100644 --- a/internal/db/proxy/postgres/server.go +++ b/internal/db/proxy/postgres/server.go @@ -21,10 +21,12 @@ import ( "os" "path/filepath" "sync" + "sync/atomic" "golang.org/x/sync/errgroup" "github.com/agentsh/agentsh/internal/db/events" + classify_pg "github.com/agentsh/agentsh/internal/db/classify/postgres" "github.com/agentsh/agentsh/internal/db/policy" "github.com/agentsh/agentsh/internal/db/service" "github.com/agentsh/agentsh/internal/db/tlsleaf" @@ -65,11 +67,19 @@ type Config struct { Logger *slog.Logger Policy *policy.RuleSet // current rule set; nil means "no rules" (implicit deny). Hot-swappable in a later plan. + // MaxQueryBytes caps the 'Q' frame body. Default 1 MiB when zero. + // Statements above the cap get a synthetic ErrorResponse(54000) + close. + MaxQueryBytes int + // UpstreamTLSConfigForTest, when non-nil, overrides the production // upstream-TLS config (system roots, verify-full, MinVersion=TLS12, // ServerName from svc.Upstream). Test-only — production callsites must // leave this nil. dialUpstream uses this verbatim when non-nil. UpstreamTLSConfigForTest *tls.Config + + // classifierForTest, when non-nil, overrides the per-dialect Parser map + // built by New(). Test-only — production callsites must leave this nil. + classifierForTest func(dialect string) classify_pg.Parser } // Server runs the AgentSH PostgreSQL proxy listeners. @@ -99,6 +109,9 @@ type Server struct { caMu sync.Mutex caRef *tlsleaf.CA + + policyPtr atomic.Pointer[policy.RuleSet] + classifiers map[string]classify_pg.Parser } // New validates cfg and returns a *Server. When cfg.Unavoidability == @@ -112,8 +125,12 @@ func New(cfg Config) (*Server, error) { return nil, errors.New("postgres.New: StateDir is required") } if cfg.Unavoidability == service.UnavoidabilityOff { + if cfg.MaxQueryBytes == 0 { + cfg.MaxQueryBytes = 1 << 20 + } srv := &Server{cfg: cfg, logger: cfg.Logger, sentinel: true, done: make(chan struct{})} srv.uidAllowed = func(uid uint32) bool { return uid == uint32(os.Getuid()) } + srv.policyPtr.Store(cfg.Policy) return srv, nil } if cfg.Sink == nil { @@ -133,12 +150,22 @@ func New(cfg Config) (*Server, error) { return nil, fmt.Errorf("postgres.New: services[%d].Listen.Path is empty for unix listener", i) } } - return &Server{ - cfg: cfg, - logger: cfg.Logger, - done: make(chan struct{}), - uidAllowed: func(uid uint32) bool { return uid == uint32(os.Getuid()) }, - }, nil + if cfg.MaxQueryBytes == 0 { + cfg.MaxQueryBytes = 1 << 20 + } + classifiers, err := buildClassifierMap(cfg.Services) + if err != nil { + return nil, err + } + srv := &Server{ + cfg: cfg, + logger: cfg.Logger, + done: make(chan struct{}), + uidAllowed: func(uid uint32) bool { return uid == uint32(os.Getuid()) }, + classifiers: classifiers, + } + srv.policyPtr.Store(cfg.Policy) + return srv, nil } // Start binds listeners and runs accept loops until ctx is cancelled. @@ -364,3 +391,9 @@ func (s *Server) ca() (*tlsleaf.CA, error) { "cert", filepath.Join(s.cfg.StateDir, "db-ca.crt")) return ca, nil } + +// SetPolicy atomically replaces the active rule set. A nil ruleset means +// "implicit deny everywhere" (matches policy.Evaluate(stmt, nil, _)). +func (s *Server) SetPolicy(rs *policy.RuleSet) { s.policyPtr.Store(rs) } + +func (s *Server) policy() *policy.RuleSet { return s.policyPtr.Load() } diff --git a/internal/db/proxy/postgres/server_test.go b/internal/db/proxy/postgres/server_test.go index 18e68cddb..f5a714b19 100644 --- a/internal/db/proxy/postgres/server_test.go +++ b/internal/db/proxy/postgres/server_test.go @@ -10,6 +10,7 @@ import ( "net" "os" "path/filepath" + "strings" "testing" "time" @@ -164,6 +165,7 @@ func TestServer_StartTwice_ReturnsError(t *testing.T) { Sink: &events.SyncSink{}, Services: []Service{{ Name: "appdb", + Dialect: "postgres", Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "appdb.sock")}, Service: policy.DBService{Name: "appdb"}, TLSMode: "terminate_reissue", @@ -240,3 +242,106 @@ func TestServer_LazyCALoad(t *testing.T) { t.Error("ca() did not return cached pointer on second call") } } + +func TestServer_New_AppliesMaxQueryBytesDefault(t *testing.T) { + cfg := Config{ + Unavoidability: service.UnavoidabilityObserve, + StateDir: t.TempDir(), + Sink: &events.SyncSink{}, + Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + Services: []Service{{ + Name: "appdb", + Family: "postgres", + Dialect: "postgres", + Upstream: "127.0.0.1:5432", + TLSMode: "terminate_reissue", + Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "appdb.sock")}, + Service: policy.DBService{Name: "appdb", Family: "postgres", Dialect: "postgres", Upstream: "127.0.0.1:5432", TLSMode: "terminate_reissue"}, + }}, + } + s, err := New(cfg) + if err != nil { + t.Fatalf("New: %v", err) + } + if got := s.cfg.MaxQueryBytes; got != 1<<20 { + t.Fatalf("MaxQueryBytes default = %d want %d", got, 1<<20) + } +} + +func TestServer_New_HonorsMaxQueryBytesOverride(t *testing.T) { + cfg := Config{ + Unavoidability: service.UnavoidabilityObserve, + StateDir: t.TempDir(), + Sink: &events.SyncSink{}, + Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + MaxQueryBytes: 4096, + Services: []Service{{ + Name: "appdb", + Family: "postgres", + Dialect: "postgres", + Upstream: "127.0.0.1:5432", + TLSMode: "terminate_reissue", + Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "appdb.sock")}, + Service: policy.DBService{Name: "appdb", Family: "postgres", Dialect: "postgres", Upstream: "127.0.0.1:5432", TLSMode: "terminate_reissue"}, + }}, + } + s, err := New(cfg) + if err != nil { + t.Fatalf("New: %v", err) + } + if got := s.cfg.MaxQueryBytes; got != 4096 { + t.Fatalf("MaxQueryBytes = %d want 4096", got) + } +} + +func TestServer_SetPolicy_AtomicSwap(t *testing.T) { + cfg := Config{ + Unavoidability: service.UnavoidabilityObserve, + StateDir: t.TempDir(), + Sink: &events.SyncSink{}, + Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + Services: []Service{{ + Name: "appdb", + Family: "postgres", + Dialect: "postgres", + Upstream: "127.0.0.1:5432", + TLSMode: "terminate_reissue", + Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "appdb.sock")}, + Service: policy.DBService{Name: "appdb", Family: "postgres", Dialect: "postgres", Upstream: "127.0.0.1:5432", TLSMode: "terminate_reissue"}, + }}, + } + s, err := New(cfg) + if err != nil { + t.Fatalf("New: %v", err) + } + if got := s.policy(); got != nil { + t.Fatalf("initial policy = %p want nil", got) + } + rs := &policy.RuleSet{} + s.SetPolicy(rs) + if got := s.policy(); got != rs { + t.Fatalf("policy() after SetPolicy = %p want %p", got, rs) + } +} + +func TestServer_New_RejectsUnknownDialect(t *testing.T) { + cfg := Config{ + Unavoidability: service.UnavoidabilityObserve, + StateDir: t.TempDir(), + Sink: &events.SyncSink{}, + Logger: slog.New(slog.NewTextHandler(testWriter{t}, nil)), + Services: []Service{{ + Name: "appdb", + Family: "postgres", + Dialect: "rabbitql", // unknown + Upstream: "127.0.0.1:5432", + TLSMode: "terminate_reissue", + Listen: ServiceListener{Kind: "unix", Path: filepath.Join(t.TempDir(), "appdb.sock")}, + Service: policy.DBService{Name: "appdb", Family: "postgres", Dialect: "rabbitql", Upstream: "127.0.0.1:5432", TLSMode: "terminate_reissue"}, + }}, + } + _, err := New(cfg) + if err == nil || !strings.Contains(err.Error(), "rabbitql") { + t.Fatalf("New on unknown dialect: err = %v", err) + } +} diff --git a/internal/policy/agent_policies_test.go b/internal/policy/agent_policies_test.go index dd34ac2e1..30795682e 100644 --- a/internal/policy/agent_policies_test.go +++ b/internal/policy/agent_policies_test.go @@ -282,6 +282,24 @@ func TestAgentPolicies_ObserveRuleDetails(t *testing.T) { assert.True(t, p.Audit.IncludeStderr) } +func TestAgentPolicies_CodingAgent(t *testing.T) { + root := findProjectRoot(t) + path := filepath.Join(root, "configs", "policies", "coding-agent.yaml") + p, err := LoadFromFile(path) + require.NoError(t, err, "failed to load coding-agent policy") + + assert.Equal(t, 1, p.Version) + assert.Equal(t, "coding-agent", p.Name) + assert.NoError(t, p.Validate()) + + assert.GreaterOrEqual(t, len(p.FileRules), 9, + "file_rules count dropped below floor of 9 (currently %d)", len(p.FileRules)) + assert.GreaterOrEqual(t, len(p.CommandRules), 2, + "command_rules count dropped below floor of 2 (currently %d)", len(p.CommandRules)) + assert.GreaterOrEqual(t, len(p.SignalRules), 3, + "signal_rules count dropped below floor of 3 (currently %d)", len(p.SignalRules)) +} + // loadAgentDefaultEngine loads agent-default.yaml and creates an engine with // variable expansion and enforced approvals. func loadAgentDefaultEngine(t *testing.T) *Engine { diff --git a/internal/policy/coding_agent_template_test.go b/internal/policy/coding_agent_template_test.go new file mode 100644 index 000000000..9b7c6e2e8 --- /dev/null +++ b/internal/policy/coding_agent_template_test.go @@ -0,0 +1,61 @@ +package policy + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +// TestCodingAgentTemplate_Loads verifies the policy that the Docker Sandboxes +// mixin kit bakes into /etc/agentsh/policies/default.yaml parses cleanly +// through the canonical loader. Any field-name typo or schema drift will be +// caught here before the kit ships. +func TestCodingAgentTemplate_Loads(t *testing.T) { + path := filepath.Join("..", "..", "configs", "policies", "coding-agent.yaml") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read template: %v", err) + } + p, err := LoadFromBytes(data) + if err != nil { + t.Fatalf("load template: %v", err) + } + if p.Name != "coding-agent" { + t.Errorf("Name = %q, want %q", p.Name, "coding-agent") + } + if len(p.FileRules) == 0 { + t.Error("expected file_rules") + } + if len(p.CommandRules) == 0 { + t.Error("expected command_rules") + } + if len(p.SignalRules) == 0 { + t.Error("expected signal_rules") + } +} + +// TestCodingAgentTemplate_DeniesCredentialPaths spot-checks that the rules from +// the design spec are actually present. Coverage isn't exhaustive; this just +// catches accidental rule deletion during future edits. +func TestCodingAgentTemplate_DeniesCredentialPaths(t *testing.T) { + path := filepath.Join("..", "..", "configs", "policies", "coding-agent.yaml") + data, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read template: %v", err) + } + body := string(data) + for _, want := range []string{ + "/.ssh/", + "/.aws/", + "/.gnupg/", + "/.kube/", + "/.netrc", + "/etc/agentsh/", + "/usr/lib/agentsh/", + } { + if !strings.Contains(body, want) { + t.Errorf("expected coding-agent.yaml to reference %q", want) + } + } +} diff --git a/internal/policy/merge.go b/internal/policy/merge.go new file mode 100644 index 000000000..a4b7197ff --- /dev/null +++ b/internal/policy/merge.go @@ -0,0 +1,178 @@ +package policy + +// MergeOverlay returns a new Policy formed by overlaying `overlay` rules on +// top of `base`. Rules with matching names in overlay replace base entries +// in-place at their original index; other overlay rules are appended in +// declared order. Rules with empty Name always append. +// +// Base metadata is preserved via a shallow struct copy: every non-rule +// field on Policy (Version, Name, Description, ResourceLimits, EnvPolicy, +// Audit, EnvInject, MCPRules, ProcessContexts, ProcessIdentities, +// PackageRules, TransparentCommands, Providers, HTTPServices, the +// yaml.Node-backed extension fields, and any other future scalar fields) +// is taken from `base`; overlay metadata is ignored. Because the copy is +// shallow, slices and maps in the returned policy SHARE backing storage +// with `base`. Callers must not mutate the returned policy's non-rule +// fields in place; clone first if needed. +// +// FileRules, NetworkRules, CommandRules, UnixRules, and SignalRules are +// merged by name. DNS-redirect and connect-redirect rules are also merged +// by name. `RegistryRules` (Windows-only), HTTP services, and +// unnamed/opaque blocks remain unmerged: base wins. +// +// If either argument is nil, the other is returned unchanged. This lets +// callers handle "no user override" without a nil check at the call site. +// +// Used by cmd/agentsh-sbx-bootstrap to combine the baked coding-agent +// template with /home/agent/.agentsh/policy.yaml at sandbox startup. +func MergeOverlay(base, overlay *Policy) *Policy { + if base == nil { + return overlay + } + if overlay == nil { + return base + } + + out := *base + out.FileRules = mergeFileRules(base.FileRules, overlay.FileRules) + out.NetworkRules = mergeNetworkRules(base.NetworkRules, overlay.NetworkRules) + out.CommandRules = mergeCommandRules(base.CommandRules, overlay.CommandRules) + out.UnixRules = mergeUnixRules(base.UnixRules, overlay.UnixRules) + out.SignalRules = mergeSignalRules(base.SignalRules, overlay.SignalRules) + out.DnsRedirectRules = mergeDnsRedirectRules(base.DnsRedirectRules, overlay.DnsRedirectRules) + out.ConnectRedirectRules = mergeConnectRedirectRules(base.ConnectRedirectRules, overlay.ConnectRedirectRules) + return &out +} + +func mergeFileRules(base, overlay []FileRule) []FileRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]FileRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeNetworkRules(base, overlay []NetworkRule) []NetworkRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]NetworkRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeCommandRules(base, overlay []CommandRule) []CommandRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]CommandRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeUnixRules(base, overlay []UnixSocketRule) []UnixSocketRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]UnixSocketRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeSignalRules(base, overlay []SignalRule) []SignalRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]SignalRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeDnsRedirectRules(base, overlay []DnsRedirectRule) []DnsRedirectRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]DnsRedirectRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} + +func mergeConnectRedirectRules(base, overlay []ConnectRedirectRule) []ConnectRedirectRule { + if len(overlay) == 0 { + return base + } + idx := map[string]int{} + for i, r := range base { + idx[r.Name] = i + } + out := append([]ConnectRedirectRule(nil), base...) + for _, r := range overlay { + if i, ok := idx[r.Name]; ok && r.Name != "" { + out[i] = r + continue + } + out = append(out, r) + } + return out +} diff --git a/internal/policy/merge_test.go b/internal/policy/merge_test.go new file mode 100644 index 000000000..4c10b1cda --- /dev/null +++ b/internal/policy/merge_test.go @@ -0,0 +1,126 @@ +package policy + +import ( + "testing" +) + +func TestMergeOverlay_OverlayWinsOnNameCollision(t *testing.T) { + base := &Policy{ + Version: 1, + Name: "base", + FileRules: []FileRule{ + {Name: "rule-a", Decision: "allow", Paths: []string{"/a"}}, + {Name: "rule-b", Decision: "allow", Paths: []string{"/b"}}, + }, + } + overlay := &Policy{ + Version: 1, + Name: "overlay", + FileRules: []FileRule{ + {Name: "rule-b", Decision: "deny", Paths: []string{"/b"}}, + {Name: "rule-c", Decision: "allow", Paths: []string{"/c"}}, + }, + } + + merged := MergeOverlay(base, overlay) + + if got := len(merged.FileRules); got != 3 { + t.Fatalf("len(FileRules) = %d, want 3", got) + } + if merged.FileRules[0].Name != "rule-a" { + t.Errorf("FileRules[0].Name = %q, want rule-a", merged.FileRules[0].Name) + } + if merged.FileRules[1].Name != "rule-b" || merged.FileRules[1].Decision != "deny" { + t.Errorf("FileRules[1] = %+v, want rule-b with decision=deny (overlay wins)", merged.FileRules[1]) + } + if merged.FileRules[2].Name != "rule-c" { + t.Errorf("FileRules[2].Name = %q, want rule-c", merged.FileRules[2].Name) + } +} + +func TestMergeOverlay_NilOverlayReturnsBase(t *testing.T) { + base := &Policy{Version: 1, Name: "base", FileRules: []FileRule{{Name: "x"}}} + merged := MergeOverlay(base, nil) + if merged != base { + t.Errorf("MergeOverlay(base, nil) should return base unchanged") + } +} + +func TestMergeOverlay_NilBaseReturnsOverlay(t *testing.T) { + overlay := &Policy{Version: 1, Name: "overlay", FileRules: []FileRule{{Name: "x"}}} + merged := MergeOverlay(nil, overlay) + if merged != overlay { + t.Errorf("MergeOverlay(nil, overlay) should return overlay unchanged") + } +} + +func TestMergeOverlay_PreservesAllRuleKinds(t *testing.T) { + base := &Policy{ + Version: 1, + Name: "base", + FileRules: []FileRule{{Name: "f1"}}, + CommandRules: []CommandRule{{Name: "c1"}}, + SignalRules: []SignalRule{{Name: "s1"}}, + NetworkRules: []NetworkRule{{Name: "n1"}}, + UnixRules: []UnixSocketRule{{Name: "u1"}}, + DnsRedirectRules: []DnsRedirectRule{{Name: "d1"}}, + ConnectRedirectRules: []ConnectRedirectRule{{Name: "cr1"}}, + } + overlay := &Policy{ + Version: 1, + Name: "overlay", + FileRules: []FileRule{{Name: "f2"}}, + CommandRules: []CommandRule{{Name: "c2"}}, + SignalRules: []SignalRule{{Name: "s2"}}, + NetworkRules: []NetworkRule{{Name: "n2"}}, + UnixRules: []UnixSocketRule{{Name: "u2"}}, + DnsRedirectRules: []DnsRedirectRule{{Name: "d2"}}, + ConnectRedirectRules: []ConnectRedirectRule{{Name: "cr2"}}, + } + merged := MergeOverlay(base, overlay) + if len(merged.FileRules) != 2 || len(merged.CommandRules) != 2 || + len(merged.SignalRules) != 2 || len(merged.NetworkRules) != 2 || + len(merged.UnixRules) != 2 || len(merged.DnsRedirectRules) != 2 || + len(merged.ConnectRedirectRules) != 2 { + t.Errorf("merged rule counts wrong: %+v", merged) + } +} + +func TestMergeOverlay_KeepsBaseMetadata(t *testing.T) { + base := &Policy{Version: 1, Name: "base", Description: "from base"} + overlay := &Policy{Version: 1, Name: "overlay"} + merged := MergeOverlay(base, overlay) + if merged.Name != "base" { + t.Errorf("merged.Name = %q, want %q (base metadata preserved)", merged.Name, "base") + } + if merged.Description != "from base" { + t.Errorf("merged.Description = %q, want %q", merged.Description, "from base") + } +} + +func TestMergeOverlay_EmptyNameOverlayAppends(t *testing.T) { + base := &Policy{ + Version: 1, + Name: "base", + FileRules: []FileRule{ + {Name: "rule-a", Decision: "allow"}, + }, + } + overlay := &Policy{ + Version: 1, + Name: "overlay", + FileRules: []FileRule{ + {Name: "", Decision: "deny", Paths: []string{"/anon"}}, // anonymous: must append + }, + } + merged := MergeOverlay(base, overlay) + if len(merged.FileRules) != 2 { + t.Fatalf("len(FileRules) = %d, want 2 (anonymous overlay rule should append)", len(merged.FileRules)) + } + if merged.FileRules[0].Name != "rule-a" { + t.Errorf("FileRules[0] = %+v, want base's rule-a preserved", merged.FileRules[0]) + } + if merged.FileRules[1].Name != "" || merged.FileRules[1].Decision != "deny" { + t.Errorf("FileRules[1] = %+v, want appended anonymous deny rule", merged.FileRules[1]) + } +} diff --git a/packaging/agent-wrap.sh b/packaging/agent-wrap.sh new file mode 100755 index 000000000..4f56b3fe5 --- /dev/null +++ b/packaging/agent-wrap.sh @@ -0,0 +1,49 @@ +#!/bin/sh +# Invoked via symlinks placed by install-agent-wrappers.sh at the original +# location of each agent binary (e.g. /usr/local/share/npm-global/bin/opencode). +# Real binary lives at the same path with a .real suffix (move-aside-and-replace). +# +# Routes the agent through `agentsh wrap`. Fail-CLOSED: any health-check +# failure refuses the launch with a non-zero exit and a stderr message. +# +# This deviates from the parent kit's "never brick the sandbox" posture +# (parent spec §7) because this kit's purpose IS enforcement; running +# unenforced when the operator asked for enforcement is the worse failure. +# +# FAKE_ROOT is a TEST-ONLY hook: when set, /run/agentsh paths are relocated +# under that root. It is ONLY honored when AGENTSH_TEST=1 is also set, so a +# sandboxed process that can manipulate environment variables cannot use +# FAKE_ROOT to redirect path resolution. Production must NOT set FAKE_ROOT or +# AGENTSH_TEST. + +set -u + +# Gated test hook. +if [ "${AGENTSH_TEST:-}" = "1" ]; then + FAKE_ROOT="${FAKE_ROOT:-}" +else + FAKE_ROOT="" +fi + +real="${0}.real" +tier_file="${FAKE_ROOT}/run/agentsh/tier" + +if [ ! -x "$real" ]; then + echo "agentsh-agent-wrap: real binary not found at $real; refusing to launch $(basename "$0")" >&2 + exit 127 +fi + +# command -v also reports shell functions; exec below dispatches to binaries +# only, so a function-named agentsh fails non-zero — still fail-closed. +if ! command -v agentsh >/dev/null 2>&1; then + echo "agentsh-agent-wrap: agentsh binary missing; refusing to launch $(basename "$0") without enforcement" >&2 + exit 1 +fi + +tier=$(cat "$tier_file" 2>/dev/null || echo missing) +if [ "$tier" != "shim" ]; then + echo "agentsh-agent-wrap: enforcement not active (tier='$tier'); refusing to launch $(basename "$0")" >&2 + exit 1 +fi + +exec agentsh wrap -- "$real" "$@" diff --git a/packaging/agent-wrap_test.sh b/packaging/agent-wrap_test.sh new file mode 100755 index 000000000..5c549ef2e --- /dev/null +++ b/packaging/agent-wrap_test.sh @@ -0,0 +1,109 @@ +#!/usr/bin/env bash +# Smoke test for packaging/agent-wrap.sh. Sets up an isolated tempdir with a +# fake agent binary at .real, fake agentsh, and fake tier file, then +# drives the wrapper through all 5 scenarios. +# +# Layout matches the move-aside-and-replace design: the wrapper symlink lives +# at $tmp/local-bin/claude and the real binary lives at +# $tmp/local-bin/claude.real (a sibling with .real suffix). + +set -euo pipefail + +here=$(cd "$(dirname "$0")" && pwd) +wrap="$here/agent-wrap.sh" + +if [ ! -x "$wrap" ]; then + echo "FAIL: $wrap missing or not executable" + exit 1 +fi + +# Create an isolated harness. +tmp=$(mktemp -d -t agent-wrap-test.XXXXXX) +trap 'rm -rf "$tmp"' EXIT + +mkdir -p "$tmp/local-bin" "$tmp/agentsh-bin" "$tmp/run/agentsh" "$tmp/empty-bin" + +# Fake real agent binary placed at .real (sibling of the symlink). +cat >"$tmp/local-bin/claude.real" <<'EOF' +#!/bin/sh +echo "REAL-CLAUDE: $*" +EOF +chmod +x "$tmp/local-bin/claude.real" + +# Fake agentsh that announces itself when called as `agentsh wrap`. +cat >"$tmp/agentsh-bin/agentsh" <<'EOF' +#!/bin/sh +echo "AGENTSH-WRAP: $*" +EOF +chmod +x "$tmp/agentsh-bin/agentsh" + +# Symlink the wrapper at the original agent location. +ln -s "$wrap" "$tmp/local-bin/claude" + +# Helper: run the symlinked wrapper with FAKE_ROOT for tier-file relocation. +run_wrap() { + AGENTSH_TEST=1 FAKE_ROOT="$tmp" PATH="$tmp/agentsh-bin:$PATH" "$tmp/local-bin/claude" "$@" +} + +run_wrap_no_agentsh() { + # Restrict PATH to an empty dir so `command -v agentsh` fails + # regardless of what is installed on the host system. + AGENTSH_TEST=1 FAKE_ROOT="$tmp" PATH="$tmp/empty-bin" "$tmp/local-bin/claude" "$@" +} + +# Test 1: ${0}.real missing → exit 127 +rm "$tmp/local-bin/claude.real" +out=$(run_wrap --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 127 ]; then + echo "FAIL: missing-real-binary should exit 127; got rc=$rc out=$out" + exit 1 +fi +# Restore for subsequent tests. +cat >"$tmp/local-bin/claude.real" <<'EOF' +#!/bin/sh +echo "REAL-CLAUDE: $*" +EOF +chmod +x "$tmp/local-bin/claude.real" +echo "PASS: missing-real-binary exits 127" + +# Test 2: agentsh missing → exit 1 +echo "shim" >"$tmp/run/agentsh/tier" +out=$(run_wrap_no_agentsh --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 1 ]; then + echo "FAIL: missing-agentsh should exit 1; got rc=$rc out=$out" + exit 1 +fi +echo "PASS: missing-agentsh exits 1" + +# Test 3: tier=none → exit 1 +echo "none" >"$tmp/run/agentsh/tier" +out=$(run_wrap --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 1 ]; then + echo "FAIL: tier=none should exit 1; got rc=$rc out=$out" + exit 1 +fi +echo "PASS: tier=none exits 1" + +# Test 4: tier file missing → exit 1 +rm "$tmp/run/agentsh/tier" +out=$(run_wrap --version 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 1 ]; then + echo "FAIL: tier-missing should exit 1; got rc=$rc out=$out" + exit 1 +fi +echo "PASS: tier-missing exits 1" + +# Test 5: everything green → exec'd agentsh wrap -- .real +echo "shim" >"$tmp/run/agentsh/tier" +out=$(run_wrap --version --foo bar 2>&1) +expected="AGENTSH-WRAP: wrap -- $tmp/local-bin/claude.real --version --foo bar" +if [ "$out" != "$expected" ]; then + echo "FAIL: engage path wrong" + echo " want: $expected" + echo " got: $out" + exit 1 +fi +echo "PASS: engages wrap with args" + +echo +echo "OK agent-wrap.sh (5/5)" diff --git a/packaging/install-agent-wrappers.sh b/packaging/install-agent-wrappers.sh new file mode 100755 index 000000000..b73808d14 --- /dev/null +++ b/packaging/install-agent-wrappers.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# Discover known agent binaries via `command -v`, move them aside to a +# `.real` sibling, and put a symlink to /usr/lib/agentsh/agent-wrap in +# the original location. +# +# Idempotent. Fail-open if the wrap script itself is missing. + +set -eu + +if [ "${AGENTSH_TEST:-}" = "1" ]; then + FAKE_ROOT="${FAKE_ROOT:-}" + _AGENT_PATH="${FAKE_TEST_PATH:-$PATH}" +else + FAKE_ROOT="" + _AGENT_PATH="$PATH" +fi + +WRAP="${FAKE_ROOT}/usr/lib/agentsh/agent-wrap" + +AGENTS="claude opencode gemini codex cursor" + +if [ ! -x "$WRAP" ]; then + echo "install-agent-wrappers: agent-wrap missing at $WRAP; skipping (kit still works without auto-wrap)" >&2 + exit 0 +fi + +for agent in $AGENTS; do + real=$(PATH="$_AGENT_PATH" command -v "$agent" 2>/dev/null || true) + if [ -z "$real" ] || [ ! -x "$real" ]; then + continue + fi + + # Idempotency: already-wrapped silent skip. + if [ -L "$real" ] && [ "$(readlink "$real")" = "$WRAP" ] && [ -e "${real}.real" ]; then + continue + fi + + # Conflict: .real exists but $real is not our symlink. + if [ -e "${real}.real" ]; then + echo "install-agent-wrappers: ${real}.real already exists but $real is not our symlink; not overwriting" >&2 + continue + fi + + mv "$real" "${real}.real" + ln -s "$WRAP" "$real" + echo "install-agent-wrappers: wrapped $agent at $real (real moved to ${real}.real)" >&2 +done diff --git a/packaging/install-agent-wrappers_test.sh b/packaging/install-agent-wrappers_test.sh new file mode 100755 index 000000000..bc5b3af12 --- /dev/null +++ b/packaging/install-agent-wrappers_test.sh @@ -0,0 +1,165 @@ +#!/usr/bin/env bash +# Smoke test for packaging/install-agent-wrappers.sh. +# Drives the installer through 6 scenarios using AGENTSH_TEST=1 / FAKE_ROOT / +# FAKE_TEST_PATH (v2 harness — move-aside-and-replace mechanism). + +set -euo pipefail + +here=$(cd "$(dirname "$0")" && pwd) +installer="$here/install-agent-wrappers.sh" + +if [ ! -x "$installer" ]; then + echo "FAIL: $installer missing or not executable" + exit 1 +fi + +# tmp dir shared across tests; each test resets state inside it. +tmp=$(mktemp -d -t install-wrappers.XXXXXX) +trap 'rm -rf "$tmp"' EXIT + +WRAP="$tmp/usr/lib/agentsh/agent-wrap" + +setup_root() { + rm -rf "$tmp" + mkdir -p "$tmp/usr/lib/agentsh" + mkdir -p "$tmp/searchable-bin" + cat >"$WRAP" <<'EOF' +#!/bin/sh +exit 0 +EOF + chmod +x "$WRAP" +} + +make_agent() { + local name="$1" + local dir="${2:-$tmp/searchable-bin}" + mkdir -p "$dir" + printf '#!/bin/sh\nexit 0\n' >"$dir/$name" + chmod +x "$dir/$name" +} + +run_installer() { + # FAKE_TEST_PATH scopes the agent command -v lookup to our fake bin dir only. + # System utilities (mv, ln, echo) use the outer PATH — not affected. + AGENTSH_TEST=1 FAKE_ROOT="$tmp" FAKE_TEST_PATH="$tmp/searchable-bin" \ + "$installer" "$@" +} + +# --------------------------------------------------------------------------- +# Test 1: No agents on PATH → nothing moved/created, exit 0 +# --------------------------------------------------------------------------- +setup_root +run_installer >/dev/null 2>&1 +if [ -n "$(ls -A "$tmp/searchable-bin" 2>/dev/null)" ]; then + echo "FAIL test1: searchable-bin should be empty; found: $(ls "$tmp/searchable-bin")" + exit 1 +fi +echo "PASS: 1 — no agents → nothing created" + +# --------------------------------------------------------------------------- +# Test 2: One agent on PATH → claude is now a symlink, claude.real exists +# --------------------------------------------------------------------------- +setup_root +make_agent "claude" +run_installer >/dev/null 2>&1 +link_target=$(readlink "$tmp/searchable-bin/claude" 2>/dev/null || echo MISSING) +if [ "$link_target" != "$WRAP" ]; then + echo "FAIL test2: claude should be symlink to WRAP; got: $link_target" + exit 1 +fi +if [ ! -x "$tmp/searchable-bin/claude.real" ]; then + echo "FAIL test2: claude.real missing or not executable" + exit 1 +fi +echo "PASS: 2 — one agent wrapped (symlink + .real)" + +# --------------------------------------------------------------------------- +# Test 3: Multiple agents on PATH → all wrapped +# --------------------------------------------------------------------------- +setup_root +for a in claude opencode; do + make_agent "$a" +done +run_installer >/dev/null 2>&1 +for a in claude opencode; do + link=$(readlink "$tmp/searchable-bin/$a" 2>/dev/null || echo MISSING) + if [ "$link" != "$WRAP" ]; then + echo "FAIL test3: $a not wrapped; link=$link" + exit 1 + fi + if [ ! -e "$tmp/searchable-bin/$a.real" ]; then + echo "FAIL test3: $a.real missing" + exit 1 + fi +done +echo "PASS: 3 — multiple agents wrapped" + +# --------------------------------------------------------------------------- +# Test 4: Foreign .real conflict → installer skips, warns, leaves files intact +# --------------------------------------------------------------------------- +setup_root +make_agent "claude" +# Simulate something else already claiming the .real slot +echo "foreign" >"$tmp/searchable-bin/claude.real" +chmod +x "$tmp/searchable-bin/claude.real" +out=$(run_installer 2>&1) || true +# claude should still be a regular file, NOT a symlink +if [ -L "$tmp/searchable-bin/claude" ]; then + echo "FAIL test4: claude was overwritten with a symlink despite conflict" + exit 1 +fi +if ! echo "$out" | grep -q "not our symlink"; then + echo "FAIL test4: expected 'not our symlink' warning; got: $out" + exit 1 +fi +echo "PASS: 4 — foreign .real conflict → skip with warning" + +# --------------------------------------------------------------------------- +# Test 5: Wrap script missing → exit 0, warning, no agents touched +# --------------------------------------------------------------------------- +setup_root +rm "$WRAP" +make_agent "claude" +out=$(run_installer 2>&1) && rc=0 || rc=$? +if [ "$rc" -ne 0 ]; then + echo "FAIL test5: missing-wrap should exit 0; got rc=$rc" + exit 1 +fi +if [ -L "$tmp/searchable-bin/claude" ]; then + echo "FAIL test5: claude was symlinked despite missing wrap" + exit 1 +fi +if ! echo "$out" | grep -q "agent-wrap missing"; then + echo "FAIL test5: expected 'agent-wrap missing' warning; got: $out" + exit 1 +fi +echo "PASS: 5 — missing wrap exits 0 with warning" + +# --------------------------------------------------------------------------- +# Test 6: Idempotency — already-wrapped state is not double-renamed +# --------------------------------------------------------------------------- +setup_root +# Manually set up wrapped state: claude.real is the original, claude is symlink +make_agent "claude.real" # the renamed-aside original +ln -s "$WRAP" "$tmp/searchable-bin/claude" +out=$(run_installer 2>&1) +# Should be silent (no warning lines about claude) +if echo "$out" | grep -q "claude"; then + echo "FAIL test6: idempotent re-run produced output about claude: $out" + exit 1 +fi +# claude should still be symlink → WRAP +link=$(readlink "$tmp/searchable-bin/claude" 2>/dev/null || echo MISSING) +if [ "$link" != "$WRAP" ]; then + echo "FAIL test6: claude no longer points to WRAP after re-run; link=$link" + exit 1 +fi +# claude.real must still exist +if [ ! -e "$tmp/searchable-bin/claude.real" ]; then + echo "FAIL test6: claude.real vanished after re-run" + exit 1 +fi +echo "PASS: 6 — idempotent (silent, no double-rename)" + +echo +echo "OK install-agent-wrappers.sh (6/6)" diff --git a/scripts/install-agentsh.sh b/scripts/install-agentsh.sh new file mode 100755 index 000000000..5c0fa5596 --- /dev/null +++ b/scripts/install-agentsh.sh @@ -0,0 +1,153 @@ +#!/bin/sh +# install-agentsh.sh — install AgentSH into a Linux container/VM. +# +# Used by the Docker Sandboxes mixin kit; also safe to run interactively +# on any supported Linux. Detects the host's package manager and +# downloads the matching release artifact from the latest AgentSH +# GitHub release. +# +# Package manager support: +# dpkg — downloads the .deb artifact +# rpm — downloads the .rpm artifact +# apk — downloads the .apk artifact (Alpine Linux) +# +# Env knobs (all optional): +# AGENTSH_VERSION Pinned release tag, e.g. v0.1.2 (default: latest) +# AGENTSH_ARCH amd64 | arm64 (default: detected via uname -m) +# AGENTSH_DRY_RUN 1 = print actions without downloading/installing +# AGENTSH_FORCE_DETECT dpkg | rpm | apk | none (test hook) +# +# Exit codes: +# 0 success +# 1 detection failure (unsupported arch or no supported package manager) +# 2 download failure +# 3 install failure + +set -eu + +GITHUB_REPO="erans/agentsh" + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +detect_arch() { + if [ -n "${AGENTSH_ARCH:-}" ]; then + printf '%s' "$AGENTSH_ARCH" + return + fi + case "$(uname -m)" in + x86_64|amd64) printf 'amd64' ;; + aarch64|arm64) printf 'arm64' ;; + *) printf 'unsupported' ;; + esac +} + +detect_pm() { + if [ -n "${AGENTSH_FORCE_DETECT:-}" ]; then + printf '%s' "$AGENTSH_FORCE_DETECT" + return + fi + if command -v dpkg >/dev/null 2>&1; then printf 'dpkg'; return; fi + if command -v rpm >/dev/null 2>&1; then printf 'rpm'; return; fi + if command -v apk >/dev/null 2>&1; then printf 'apk'; return; fi + printf 'none' +} + +# Resolve the version to install. If AGENTSH_VERSION is set, use it +# verbatim. Otherwise hit the GitHub Releases API and parse the tag from +# the JSON response using only sed — no jq dependency required. +resolve_version() { + if [ -n "${AGENTSH_VERSION:-}" ]; then + printf '%s' "$AGENTSH_VERSION" + return + fi + ver=$(curl -fsSL "https://api.github.com/repos/${GITHUB_REPO}/releases/latest" \ + | sed -n 's/.*"tag_name":[[:space:]]*"\([^"]*\)".*/\1/p') + if [ -z "$ver" ]; then + echo "install-agentsh: failed to resolve latest version from GitHub API" >&2 + exit 2 + fi + printf '%s' "$ver" +} + +# Wrap every side-effecting command so that AGENTSH_DRY_RUN=1 prints +# rather than executes. Never bypass this wrapper. +run() { + if [ "${AGENTSH_DRY_RUN:-}" = "1" ]; then + echo "DRY: $*" + else + "$@" + fi +} + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +main() { + arch=$(detect_arch) + if [ "$arch" = "unsupported" ]; then + echo "install-agentsh: unsupported architecture $(uname -m)" >&2 + exit 1 + fi + + pm=$(detect_pm) + + # Resolve version once — this may make a network call, so skip it in + # dry-run mode if AGENTSH_VERSION is not set, to keep tests offline. + if [ "${AGENTSH_DRY_RUN:-}" = "1" ] && [ -z "${AGENTSH_VERSION:-}" ]; then + ver="VERSION" + else + ver=$(resolve_version) + fi + + base="https://github.com/${GITHUB_REPO}/releases/download/${ver}" + + case "$pm" in + dpkg) + # GoReleaser nfpms deb default: agentsh__linux_.deb + fname="agentsh_${ver}_linux_${arch}.deb" + url="${base}/${fname}" + tmp="/tmp/agentsh.deb" + echo "install-agentsh: using dpkg (${url})" + run curl -fsSL "$url" -o "$tmp" || exit 2 + run dpkg -i "$tmp" || exit 3 + ;; + + rpm) + # GoReleaser nfpms rpm default: agentsh--1..rpm + rpmarch=$([ "$arch" = "amd64" ] && echo x86_64 || echo aarch64) + fname="agentsh-${ver}-1.${rpmarch}.rpm" + url="${base}/${fname}" + tmp="/tmp/agentsh.rpm" + echo "install-agentsh: using rpm (${url})" + run curl -fsSL "$url" -o "$tmp" || exit 2 + run rpm -Uvh --replacepkgs "$tmp" || exit 3 + ;; + + apk) + # GoReleaser nfpms apk default: agentsh__linux_.apk + fname="agentsh_${ver}_linux_${arch}.apk" + url="${base}/${fname}" + tmp="/tmp/agentsh.apk" + echo "install-agentsh: using apk (${url})" + run curl -fsSL "$url" -o "$tmp" || exit 2 + run apk add --allow-untrusted "$tmp" || exit 3 + ;; + + none) + echo "install-agentsh: no supported package manager (dpkg/rpm/apk) found" >&2 + exit 1 + ;; + + *) + echo "install-agentsh: unknown package manager '${pm}'" >&2 + exit 1 + ;; + esac + + echo "install-agentsh: done" +} + +main "$@" diff --git a/scripts/install-agentsh_test.sh b/scripts/install-agentsh_test.sh new file mode 100755 index 000000000..2e58a66b9 --- /dev/null +++ b/scripts/install-agentsh_test.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Smoke test for scripts/install-agentsh.sh. +# Runs the script with AGENTSH_DRY_RUN=1 and asserts it picks the right +# package manager + URL based on AGENTSH_FORCE_DETECT. + +set -euo pipefail + +here=$(cd "$(dirname "$0")" && pwd) +script="$here/install-agentsh.sh" + +# Test 1: detects dpkg +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=dpkg AGENTSH_ARCH=amd64 "$script" 2>&1 || true) +echo "$out" | grep -q "dpkg.*agentsh_.*_linux_amd64.deb" || { + echo "FAIL: dpkg branch missing or wrong URL" + echo "----- output -----" + echo "$out" + exit 1 +} + +# Test 2: detects rpm +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=rpm AGENTSH_ARCH=amd64 "$script" 2>&1 || true) +echo "$out" | grep -q "rpm.*agentsh-.*\.x86_64\.rpm" || { + echo "FAIL: rpm branch missing or wrong URL" + echo "----- output -----" + echo "$out" + exit 1 +} + +# Test 3: detects apk +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=apk AGENTSH_ARCH=amd64 "$script" 2>&1 || true) +echo "$out" | grep -q "apk.*agentsh_.*_linux_amd64.apk" || { + echo "FAIL: apk branch missing or wrong URL" + echo "----- output -----" + echo "$out" + exit 1 +} + +# Test 4: unknown package manager fails fast +if AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=none "$script" 2>/dev/null; then + echo "FAIL: expected non-zero exit when no package manager detected" + exit 1 +fi + +# Test 5: arm64 selects arm64 artifact +out=$(AGENTSH_DRY_RUN=1 AGENTSH_FORCE_DETECT=dpkg AGENTSH_ARCH=arm64 "$script" 2>&1 || true) +echo "$out" | grep -q "agentsh_.*_linux_arm64.deb" || { + echo "FAIL: arm64 URL not generated" + echo "----- output -----" + echo "$out" + exit 1 +} + +echo "OK install-agentsh.sh"