2389-research · clintecker · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -199,6 +199,12 @@ This applies to `tracker validate`, `tracker simulate`, and `tracker run` unifor
 
 ### Tool node safety — LLM output as shell input
 - NEVER `eval` content extracted from LLM-written files (arbitrary command execution)
+- Variable expansion in tool_command uses a safe-key allowlist for `ctx.*` keys: only `outcome`, `preferred_label`, `human_response`, `interview_answers` can be interpolated. All `graph.*` and `params.*` keys are always allowed (author-controlled). All LLM-origin `ctx.*` keys (`last_response`, `tool_stdout`, `response.*`, etc.) are blocked.
+- The safe pattern: write LLM output to a file in a prior tool node, then read it in the command: `cat .ai/output.json | jq ...`
+- Tool command output is capped at 64KB per stream by default (configurable via `output_limit` node attr, hard ceiling 10MB via `--max-output-limit`)
+- A built-in denylist blocks common dangerous patterns (eval, pipe-to-shell, curl|sh). Use `--bypass-denylist` to override.
+- An optional allowlist (`--tool-allowlist` CLI flag or `tool_commands_allow` graph attr) restricts commands to specific patterns. The allowlist cannot override the denylist.
+- Sensitive environment variables (`*_API_KEY`, `*_SECRET`, `*_TOKEN`, `*_PASSWORD`) are stripped from tool subprocesses. Override with `TRACKER_PASS_ENV=1`.
 - Always strip comments (`grep -v '^#'`) and blank lines from LLM-generated lists before using as patterns
 - Use flexible regex for markdown headers LLMs write (they vary: `##`, `###`, with/without colons)
 - Add empty-file guards after extracting content from LLM-written files — fail loudly, don't proceed with empty data

diff --git a/agent/exec/env_test.go b/agent/exec/env_test.go
@@ -122,3 +122,49 @@ func TestLocalPathEscapePrevention(t *testing.T) {
 		t.Error("expected error for path traversal")
 	}
 }
+
+func TestExecCommandWithLimit_Truncates(t *testing.T) {
+	env := NewLocalEnvironment(t.TempDir())
+	result, err := env.ExecCommandWithLimit(
+		context.Background(), "sh", []string{"-c", "yes hello | head -c 200000"},
+		5*time.Second, 1024,
+	)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if len(result.Stdout) > 1100 {
+		t.Errorf("stdout len = %d, want <= ~1100", len(result.Stdout))
+	}
+	if !strings.Contains(result.Stdout, "...(output truncated") {
+		t.Error("expected truncation marker in stdout")
+	}
+}
+
+func TestExecCommandWithLimit_NoTruncation(t *testing.T) {
+	env := NewLocalEnvironment(t.TempDir())
+	result, err := env.ExecCommandWithLimit(
+		context.Background(), "sh", []string{"-c", "echo hello"},
+		5*time.Second, 65536,
+	)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if strings.Contains(result.Stdout, "truncated") {
+		t.Error("small output should not be truncated")
+	}
+}
+
+func TestExecCommandWithLimit_CustomEnv(t *testing.T) {
+	env := NewLocalEnvironment(t.TempDir())
+	customEnv := []string{"MY_VAR=hello"}
+	result, err := env.ExecCommandWithLimit(
+		context.Background(), "sh", []string{"-c", "echo $MY_VAR"},
+		5*time.Second, 65536, customEnv,
+	)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if strings.TrimSpace(result.Stdout) != "hello" {
+		t.Errorf("stdout = %q, want %q", strings.TrimSpace(result.Stdout), "hello")
+	}
+}
diff --git a/agent/exec/local.go b/agent/exec/local.go
@@ -12,6 +12,7 @@ import (
 	"os/exec"
 	"path/filepath"
 	"strings"
+	"sync"
 	"syscall"
 	"time"
 )
@@ -134,6 +135,99 @@ func (e *LocalEnvironment) ExecCommand(ctx context.Context, command string, args
 	return result, nil
 }
 
+// limitedBuffer caps the amount of data that can be written. When the limit
+// is reached, excess data is silently discarded and the truncated flag is set.
+type limitedBuffer struct {
+	mu        sync.Mutex
+	buf       bytes.Buffer
+	limit     int
+	truncated bool
+}
+
+func (lb *limitedBuffer) Write(p []byte) (int, error) {
+	lb.mu.Lock()
+	defer lb.mu.Unlock()
+	remaining := lb.limit - lb.buf.Len()
+	if remaining <= 0 {
+		lb.truncated = true
+		return len(p), nil
+	}
+	if len(p) > remaining {
+		lb.truncated = true
+		lb.buf.Write(p[:remaining])
+		return len(p), nil // report full length to avoid io.ErrShortWrite
+	}
+	return lb.buf.Write(p)
+}
+
+func (lb *limitedBuffer) String() string {
+	lb.mu.Lock()
+	defer lb.mu.Unlock()
+	s := lb.buf.String()
+	if lb.truncated {
+		s += fmt.Sprintf("\n...(output truncated at %d bytes)", lb.limit)
+	}
+	return s
+}
+
+// ExecCommandWithLimit runs a command with output capped at outputLimit bytes per stream.
+// If outputLimit <= 0, output is unbounded (same as ExecCommand).
+// Optional env parameter sets the subprocess environment (nil = inherit parent).
+func (e *LocalEnvironment) ExecCommandWithLimit(ctx context.Context, command string, args []string, timeout time.Duration, outputLimit int, env ...[]string) (CommandResult, error) {
+	ctx, cancel := context.WithTimeout(ctx, timeout)
+	defer cancel()
+
+	cmd := exec.CommandContext(ctx, command, args...)
+	cmd.Dir = e.workDir
+	cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+	cmd.Cancel = func() error {
+		return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
+	}
+	cmd.WaitDelay = 5 * time.Second
+
+	if len(env) > 0 && env[0] != nil {
+		cmd.Env = env[0]
+	}
+
+	if outputLimit <= 0 {
+		var stdout, stderr bytes.Buffer
+		cmd.Stdout = &stdout
+		cmd.Stderr = &stderr
+		err := cmd.Run()
+		result := CommandResult{Stdout: stdout.String(), Stderr: stderr.String()}
+		if err != nil {
+			if ctx.Err() != nil {
+				return result, fmt.Errorf("command timed out after %v", timeout)
+			}
+			if exitErr, ok := err.(*exec.ExitError); ok {
+				result.ExitCode = exitErr.ExitCode()
+				return result, nil
+			}
+			return result, err
+		}
+		return result, nil
+	}
+
+	stdoutBuf := &limitedBuffer{limit: outputLimit}
+	stderrBuf := &limitedBuffer{limit: outputLimit}
+	cmd.Stdout = stdoutBuf
+	cmd.Stderr = stderrBuf
+
+	err := cmd.Run()
+	result := CommandResult{Stdout: stdoutBuf.String(), Stderr: stderrBuf.String()}
+	if err != nil {
+		if ctx.Err() != nil {
+			return result, fmt.Errorf("command timed out after %v", timeout)
+		}
+		if exitErr, ok := err.(*exec.ExitError); ok {
+			result.ExitCode = exitErr.ExitCode()
+			return result, nil
+		}
+		return result, err
+	}
+	return result, nil
+}
+
 // Glob returns file paths matching a pattern relative to the working directory.
 func (e *LocalEnvironment) Glob(ctx context.Context, pattern string) ([]string, error) {
 	fullPattern := filepath.Join(e.workDir, pattern)

diff --git a/pipeline/expand.go b/pipeline/expand.go
@@ -7,6 +7,16 @@ import (
 	"strings"
 )
 
+// toolCommandSafeCtxKeys lists the only ctx.* keys allowed in tool_command
+// variable expansion. All other ctx.* keys are blocked to prevent LLM output
+// injection into shell commands.
+var toolCommandSafeCtxKeys = map[string]bool{
+	"outcome":           true,
+	"preferred_label":   true,
+	"human_response":    true,
+	"interview_answers": true,
+}
+
 // ExpandVariables replaces ${namespace.key} patterns with values from the provided sources.
 // Supports three namespaces:
 //   - ctx: runtime context (from PipelineContext)
@@ -16,6 +26,10 @@ import (
 // In lenient mode (strict=false), undefined variables expand to empty string.
 // In strict mode (strict=true), undefined variables return an error.
 //
+// When toolCommandMode is true (optional variadic parameter), only allowlisted
+// ctx.* keys can be expanded — all others return an error to prevent LLM output
+// injection into shell commands.
+//
 // Examples:
 //
 //	${ctx.human_response} → value from PipelineContext
@@ -27,6 +41,7 @@ func ExpandVariables(
 	params map[string]string,
 	graphAttrs map[string]string,
 	strict bool,
+	toolCommandMode ...bool,
 ) (string, error) {
 	if text == "" {
 		return text, nil
@@ -81,6 +96,18 @@ func ExpandVariables(
 			return "", err
 		}
 
+		// In tool command mode, block unsafe ctx.* keys.
+		isToolCmd := len(toolCommandMode) > 0 && toolCommandMode[0]
+		if isToolCmd && found && namespace == "ctx" && !toolCommandSafeCtxKeys[key] {
+			return "", fmt.Errorf(
+				"tool_command references unsafe variable ${ctx.%s} — "+
+					"LLM/tool output cannot be interpolated into shell commands. "+
+					"Safe ctx keys: outcome, preferred_label, human_response, interview_answers. "+
+					"Write output to a file in a prior tool node and read it in your command instead",
+				key,
+			)
+		}
+
 		if !found {
 			if strict {
 				available := availableKeys(namespace, ctx, params, graphAttrs)

diff --git a/pipeline/expand_test.go b/pipeline/expand_test.go
@@ -1,6 +1,7 @@
 package pipeline
 
 import (
+	"strings"
 	"testing"
 )
 
@@ -559,3 +560,75 @@ func TestInjectParamsIntoGraph_MixedVariables(t *testing.T) {
 		t.Errorf("got %q, want %q", result.Nodes["Agent1"].Attrs["prompt"], expected)
 	}
 }
+
+func TestExpandVariables_ToolCommandMode_BlocksLLMOutput(t *testing.T) {
+	ctx := NewPipelineContext()
+	ctx.Set("last_response", "malicious; rm -rf /")
+	ctx.Set("outcome", "success")
+
+	_, err := ExpandVariables("echo ${ctx.last_response}", ctx, nil, nil, false, true)
+	if err == nil {
+		t.Fatal("expected error for tainted key in tool command mode")
+	}
+	if !strings.Contains(err.Error(), "unsafe variable") {
+		t.Errorf("error = %q, want 'unsafe variable' message", err)
+	}
+
+	result, err := ExpandVariables("status=${ctx.outcome}", ctx, nil, nil, false, true)
+	if err != nil {
+		t.Fatalf("unexpected error for safe key: %v", err)
+	}
+	if result != "status=success" {
+		t.Errorf("result = %q, want %q", result, "status=success")
+	}
+}
+
+func TestExpandVariables_ToolCommandMode_AllowsHumanResponse(t *testing.T) {
+	ctx := NewPipelineContext()
+	ctx.Set("human_response", "user typed this")
+
+	result, err := ExpandVariables("echo ${ctx.human_response}", ctx, nil, nil, false, true)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result != "echo user typed this" {
+		t.Errorf("result = %q, want %q", result, "echo user typed this")
+	}
+}
+
+func TestExpandVariables_ToolCommandMode_BlocksResponsePrefix(t *testing.T) {
+	ctx := NewPipelineContext()
+	ctx.Set("response.agent1", "LLM output here")
+
+	_, err := ExpandVariables("echo ${ctx.response.agent1}", ctx, nil, nil, false, true)
+	if err == nil {
+		t.Fatal("expected error for response.* key in tool command mode")
+	}
+}
+
+func TestExpandVariables_ToolCommandMode_AllowsGraphAndParams(t *testing.T) {
+	ctx := NewPipelineContext()
+	graphAttrs := map[string]string{"goal": "build the app"}
+	params := map[string]string{"model": "sonnet"}
+
+	result, err := ExpandVariables("${graph.goal} ${params.model}", ctx, params, graphAttrs, false, true)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result != "build the app sonnet" {
+		t.Errorf("result = %q, want %q", result, "build the app sonnet")
+	}
+}
+
+func TestExpandVariables_NormalMode_AllowsEverything(t *testing.T) {
+	ctx := NewPipelineContext()
+	ctx.Set("last_response", "hello world")
+
+	result, err := ExpandVariables("echo ${ctx.last_response}", ctx, nil, nil, false, false)
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if result != "echo hello world" {
+		t.Errorf("result = %q, want %q", result, "echo hello world")
+	}
+}