Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,12 @@ This applies to `tracker validate`, `tracker simulate`, and `tracker run` unifor

### Tool node safety — LLM output as shell input
- NEVER `eval` content extracted from LLM-written files (arbitrary command execution)
- Variable expansion in tool_command uses a safe-key allowlist for `ctx.*` keys: only `outcome`, `preferred_label`, `human_response`, `interview_answers` can be interpolated. All `graph.*` and `params.*` keys are always allowed (author-controlled). All LLM-origin `ctx.*` keys (`last_response`, `tool_stdout`, `response.*`, etc.) are blocked.
- The safe pattern: write LLM output to a file in a prior tool node, then read it in the command: `cat .ai/output.json | jq ...`
- Tool command output is capped at 64KB per stream by default (configurable via `output_limit` node attr, hard ceiling 10MB via `--max-output-limit`)
- A built-in denylist blocks common dangerous patterns (eval, pipe-to-shell, curl|sh). Use `--bypass-denylist` to override.
- An optional allowlist (`--tool-allowlist` CLI flag or `tool_commands_allow` graph attr) restricts commands to specific patterns. The allowlist cannot override the denylist.
- Sensitive environment variables (`*_API_KEY`, `*_SECRET`, `*_TOKEN`, `*_PASSWORD`) are stripped from tool subprocesses. Override with `TRACKER_PASS_ENV=1`.
- Always strip comments (`grep -v '^#'`) and blank lines from LLM-generated lists before using as patterns
- Use flexible regex for markdown headers LLMs write (they vary: `##`, `###`, with/without colons)
- Add empty-file guards after extracting content from LLM-written files — fail loudly, don't proceed with empty data
Expand Down
46 changes: 46 additions & 0 deletions agent/exec/env_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,49 @@ func TestLocalPathEscapePrevention(t *testing.T) {
t.Error("expected error for path traversal")
}
}

func TestExecCommandWithLimit_Truncates(t *testing.T) {
env := NewLocalEnvironment(t.TempDir())
result, err := env.ExecCommandWithLimit(
context.Background(), "sh", []string{"-c", "yes hello | head -c 200000"},
5*time.Second, 1024,
)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if len(result.Stdout) > 1100 {
t.Errorf("stdout len = %d, want <= ~1100", len(result.Stdout))
}
if !strings.Contains(result.Stdout, "...(output truncated") {
t.Error("expected truncation marker in stdout")
}
}

func TestExecCommandWithLimit_NoTruncation(t *testing.T) {
env := NewLocalEnvironment(t.TempDir())
result, err := env.ExecCommandWithLimit(
context.Background(), "sh", []string{"-c", "echo hello"},
5*time.Second, 65536,
)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if strings.Contains(result.Stdout, "truncated") {
t.Error("small output should not be truncated")
}
}

func TestExecCommandWithLimit_CustomEnv(t *testing.T) {
env := NewLocalEnvironment(t.TempDir())
customEnv := []string{"MY_VAR=hello"}
result, err := env.ExecCommandWithLimit(
context.Background(), "sh", []string{"-c", "echo $MY_VAR"},
5*time.Second, 65536, customEnv,
)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if strings.TrimSpace(result.Stdout) != "hello" {
t.Errorf("stdout = %q, want %q", strings.TrimSpace(result.Stdout), "hello")
}
}
94 changes: 94 additions & 0 deletions agent/exec/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"os/exec"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
)
Expand Down Expand Up @@ -134,6 +135,99 @@ func (e *LocalEnvironment) ExecCommand(ctx context.Context, command string, args
return result, nil
}

// limitedBuffer caps the amount of data that can be written. When the limit
// is reached, excess data is silently discarded and the truncated flag is set.
type limitedBuffer struct {
mu sync.Mutex
buf bytes.Buffer
limit int
truncated bool
}

func (lb *limitedBuffer) Write(p []byte) (int, error) {
lb.mu.Lock()
defer lb.mu.Unlock()
remaining := lb.limit - lb.buf.Len()
if remaining <= 0 {
lb.truncated = true
return len(p), nil
}
if len(p) > remaining {
lb.truncated = true
lb.buf.Write(p[:remaining])
return len(p), nil // report full length to avoid io.ErrShortWrite
}
return lb.buf.Write(p)
Comment on lines +155 to +160
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Return original byte count from limitedBuffer writes

When len(p) > remaining, limitedBuffer.Write truncates p and returns lb.buf.Write(p), which reports fewer bytes than were requested. os/exec streams use io.Copy, and a short write with nil error is treated as io.ErrShortWrite, so tool commands can fail once output crosses the cap boundary instead of being cleanly truncated. This affects any command that emits enough output for a read chunk to exceed the remaining buffer space.

Useful? React with 👍 / 👎.

}

func (lb *limitedBuffer) String() string {
lb.mu.Lock()
defer lb.mu.Unlock()
s := lb.buf.String()
if lb.truncated {
s += fmt.Sprintf("\n...(output truncated at %d bytes)", lb.limit)
}
return s
}

// ExecCommandWithLimit runs a command with output capped at outputLimit bytes per stream.
// If outputLimit <= 0, output is unbounded (same as ExecCommand).
// Optional env parameter sets the subprocess environment (nil = inherit parent).
func (e *LocalEnvironment) ExecCommandWithLimit(ctx context.Context, command string, args []string, timeout time.Duration, outputLimit int, env ...[]string) (CommandResult, error) {
ctx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

cmd := exec.CommandContext(ctx, command, args...)
cmd.Dir = e.workDir
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
cmd.Cancel = func() error {
return syscall.Kill(-cmd.Process.Pid, syscall.SIGKILL)
}
cmd.WaitDelay = 5 * time.Second

if len(env) > 0 && env[0] != nil {
cmd.Env = env[0]
}

if outputLimit <= 0 {
var stdout, stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
err := cmd.Run()
result := CommandResult{Stdout: stdout.String(), Stderr: stderr.String()}
if err != nil {
if ctx.Err() != nil {
return result, fmt.Errorf("command timed out after %v", timeout)
}
if exitErr, ok := err.(*exec.ExitError); ok {
result.ExitCode = exitErr.ExitCode()
return result, nil
}
return result, err
}
return result, nil
}

stdoutBuf := &limitedBuffer{limit: outputLimit}
stderrBuf := &limitedBuffer{limit: outputLimit}
cmd.Stdout = stdoutBuf
cmd.Stderr = stderrBuf

err := cmd.Run()
result := CommandResult{Stdout: stdoutBuf.String(), Stderr: stderrBuf.String()}
if err != nil {
if ctx.Err() != nil {
return result, fmt.Errorf("command timed out after %v", timeout)
}
if exitErr, ok := err.(*exec.ExitError); ok {
result.ExitCode = exitErr.ExitCode()
return result, nil
}
return result, err
}
return result, nil
}

// Glob returns file paths matching a pattern relative to the working directory.
func (e *LocalEnvironment) Glob(ctx context.Context, pattern string) ([]string, error) {
fullPattern := filepath.Join(e.workDir, pattern)
Expand Down
27 changes: 27 additions & 0 deletions pipeline/expand.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ import (
"strings"
)

// toolCommandSafeCtxKeys lists the only ctx.* keys allowed in tool_command
// variable expansion. All other ctx.* keys are blocked to prevent LLM output
// injection into shell commands.
var toolCommandSafeCtxKeys = map[string]bool{
"outcome": true,
"preferred_label": true,
"human_response": true,
"interview_answers": true,
}

// ExpandVariables replaces ${namespace.key} patterns with values from the provided sources.
// Supports three namespaces:
// - ctx: runtime context (from PipelineContext)
Expand All @@ -16,6 +26,10 @@ import (
// In lenient mode (strict=false), undefined variables expand to empty string.
// In strict mode (strict=true), undefined variables return an error.
//
// When toolCommandMode is true (optional variadic parameter), only allowlisted
// ctx.* keys can be expanded — all others return an error to prevent LLM output
// injection into shell commands.
//
// Examples:
//
// ${ctx.human_response} → value from PipelineContext
Expand All @@ -27,6 +41,7 @@ func ExpandVariables(
params map[string]string,
graphAttrs map[string]string,
strict bool,
toolCommandMode ...bool,
) (string, error) {
if text == "" {
return text, nil
Expand Down Expand Up @@ -81,6 +96,18 @@ func ExpandVariables(
return "", err
}

// In tool command mode, block unsafe ctx.* keys.
isToolCmd := len(toolCommandMode) > 0 && toolCommandMode[0]
if isToolCmd && found && namespace == "ctx" && !toolCommandSafeCtxKeys[key] {
return "", fmt.Errorf(
"tool_command references unsafe variable ${ctx.%s} — "+
"LLM/tool output cannot be interpolated into shell commands. "+
"Safe ctx keys: outcome, preferred_label, human_response, interview_answers. "+
"Write output to a file in a prior tool node and read it in your command instead",
key,
)
}

if !found {
if strict {
available := availableKeys(namespace, ctx, params, graphAttrs)
Expand Down
73 changes: 73 additions & 0 deletions pipeline/expand_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package pipeline

import (
"strings"
"testing"
)

Expand Down Expand Up @@ -559,3 +560,75 @@ func TestInjectParamsIntoGraph_MixedVariables(t *testing.T) {
t.Errorf("got %q, want %q", result.Nodes["Agent1"].Attrs["prompt"], expected)
}
}

func TestExpandVariables_ToolCommandMode_BlocksLLMOutput(t *testing.T) {
ctx := NewPipelineContext()
ctx.Set("last_response", "malicious; rm -rf /")
ctx.Set("outcome", "success")

_, err := ExpandVariables("echo ${ctx.last_response}", ctx, nil, nil, false, true)
if err == nil {
t.Fatal("expected error for tainted key in tool command mode")
}
if !strings.Contains(err.Error(), "unsafe variable") {
t.Errorf("error = %q, want 'unsafe variable' message", err)
}

result, err := ExpandVariables("status=${ctx.outcome}", ctx, nil, nil, false, true)
if err != nil {
t.Fatalf("unexpected error for safe key: %v", err)
}
if result != "status=success" {
t.Errorf("result = %q, want %q", result, "status=success")
}
}

func TestExpandVariables_ToolCommandMode_AllowsHumanResponse(t *testing.T) {
ctx := NewPipelineContext()
ctx.Set("human_response", "user typed this")

result, err := ExpandVariables("echo ${ctx.human_response}", ctx, nil, nil, false, true)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result != "echo user typed this" {
t.Errorf("result = %q, want %q", result, "echo user typed this")
}
}

func TestExpandVariables_ToolCommandMode_BlocksResponsePrefix(t *testing.T) {
ctx := NewPipelineContext()
ctx.Set("response.agent1", "LLM output here")

_, err := ExpandVariables("echo ${ctx.response.agent1}", ctx, nil, nil, false, true)
if err == nil {
t.Fatal("expected error for response.* key in tool command mode")
}
}

func TestExpandVariables_ToolCommandMode_AllowsGraphAndParams(t *testing.T) {
ctx := NewPipelineContext()
graphAttrs := map[string]string{"goal": "build the app"}
params := map[string]string{"model": "sonnet"}

result, err := ExpandVariables("${graph.goal} ${params.model}", ctx, params, graphAttrs, false, true)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result != "build the app sonnet" {
t.Errorf("result = %q, want %q", result, "build the app sonnet")
}
}

func TestExpandVariables_NormalMode_AllowsEverything(t *testing.T) {
ctx := NewPipelineContext()
ctx.Set("last_response", "hello world")

result, err := ExpandVariables("echo ${ctx.last_response}", ctx, nil, nil, false, false)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if result != "echo hello world" {
t.Errorf("result = %q, want %q", result, "echo hello world")
}
}
Loading
Loading