From 439a385fcd1d76b048bc1334f5a25c568019502c Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Fri, 10 Apr 2026 15:23:36 -0400
Subject: [PATCH 01/11] Add context layer for prompt assembly and tool
 execution gating

Introduces src/context/ with two distinct concerns:

- Static system prompt assembly: buildSystemContext composes discovered
  AGENTS.md/CLAUDE.md instructions, environment snapshot (cwd, shell, git),
  and tool guidance into a single string. Runs once at init so the system
  prompt stays stable for Anthropic prompt caching.

- Dynamic per-step layers: withContext/applyContextLayers wrap any ToolSet
  with ContextLayer gates (beforeExecute) and transforms (afterExecute).
  Ships createExecutionPolicy (plan-mode blocking) and createOutputPolicy
  (truncation, redirection hints, optional disk stash). createPrepareStep
  composes compaction + context-status + plan-mode hints into an AI SDK
  PrepareStepFunction without ever mutating the system prompt.

Also refreshes repo docs:

- Rewrites root AGENTS.md as a contributor guide (dropped ~400 lines of
  consumer API examples that belonged in README), adds Core Principles
  section covering type safety, testability, error-return convention,
  and the folder-AGENTS.md documentation invariant.
- Promotes root CLAUDE.md to a symlink -> AGENTS.md, matching the
  convention enforced everywhere else by scripts/check-agents-md.sh.
- Adds src/context/AGENTS.md + CLAUDE.md symlink documenting internal
  architecture, design patterns, and common modifications.
- Adds README section documenting buildSystemContext, layer composition,
  and prepareStep wiring, plus API reference entries for new exports.

Test coverage: 7 new test files under tests/context/ covering layer
wrapping, gate short-circuit, transform pipe, parallel isolation,
output policy truncation/stash, execution policy, prepare step
composition, and end-to-end integration with a real ToolSet.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 .gitignore                             |   3 +
 AGENTS.md                              | 751 +++++++------------------
 CLAUDE.md                              | 650 +--------------------
 README.md                              | 101 ++++
 src/context/AGENTS.md                  | 137 +++++
 src/context/CLAUDE.md                  |   1 +
 src/context/build-context.ts           |  94 ++++
 src/context/environment.ts             |  91 +++
 src/context/execution-policy.ts        |  49 ++
 src/context/index.ts                   | 128 +++++
 src/context/instructions.ts            | 137 +++++
 src/context/output-policy.ts           | 227 ++++++++
 src/context/prepare-step.ts            | 110 ++++
 src/context/tool-guidance.ts           |  46 ++
 src/index.ts                           |  29 +
 src/tools/ask-user.ts                  |   4 +-
 src/tools/index.ts                     |  54 +-
 src/tools/read.ts                      |   3 +-
 src/types.ts                           |  34 +-
 tests/context/build-context.test.ts    | 434 ++++++++++++++
 tests/context/execution-policy.test.ts | 140 +++++
 tests/context/integration.test.ts      | 275 +++++++++
 tests/context/output-policy.test.ts    | 517 +++++++++++++++++
 tests/context/parallel.test.ts         | 175 ++++++
 tests/context/prepare-step.test.ts     | 196 +++++++
 tests/context/with-context.test.ts     | 344 +++++++++++
 tests/tools/read.test.ts               |   5 +-
 27 files changed, 3514 insertions(+), 1221 deletions(-)
 mode change 100644 => 120000 CLAUDE.md
 create mode 100644 src/context/AGENTS.md
 create mode 120000 src/context/CLAUDE.md
 create mode 100644 src/context/build-context.ts
 create mode 100644 src/context/environment.ts
 create mode 100644 src/context/execution-policy.ts
 create mode 100644 src/context/index.ts
 create mode 100644 src/context/instructions.ts
 create mode 100644 src/context/output-policy.ts
 create mode 100644 src/context/prepare-step.ts
 create mode 100644 src/context/tool-guidance.ts
 create mode 100644 tests/context/build-context.test.ts
 create mode 100644 tests/context/execution-policy.test.ts
 create mode 100644 tests/context/integration.test.ts
 create mode 100644 tests/context/output-policy.test.ts
 create mode 100644 tests/context/parallel.test.ts
 create mode 100644 tests/context/prepare-step.test.ts
 create mode 100644 tests/context/with-context.test.ts

diff --git a/.gitignore b/.gitignore
index 80c7ce7..90192a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,3 +177,6 @@ dist
 #workflow review
 /todos
 
+# Local reference repos (symlinked by contributors)
+/references
+
diff --git a/AGENTS.md b/AGENTS.md
index d656162..917d6e8 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,658 +1,285 @@
-# AGENTS.md - Using bashkit
+# BashKit — Contributor Guide
 
-bashkit provides agentic coding tools for the Vercel AI SDK. This guide helps AI agents use bashkit when building applications.
+> Agentic coding tools for the Vercel AI SDK.
 
-## Installation
+**Tech Stack**: TypeScript · Bun · Vercel AI SDK · Zod
+**Package**: `bashkit` ([npm](https://www.npmjs.com/package/bashkit) · [GitHub](https://github.com/jbreite/bashkit))
 
-```bash
-npm install bashkit ai @ai-sdk/anthropic
-# or
-pnpm add bashkit ai @ai-sdk/anthropic
-# or
-yarn add bashkit ai @ai-sdk/anthropic
-# or
-bun add bashkit ai @ai-sdk/anthropic
-```
-
-## Quick Setup
-
-### LocalSandbox (Development)
-
-Runs commands directly on the local machine. Use for development/testing only.
-
-```typescript
-import { createAgentTools, createLocalSandbox } from "bashkit";
-
-const sandbox = createLocalSandbox({ cwd: "/tmp/workspace" });
-const { tools } = createAgentTools(sandbox);
-```
-
-### VercelSandbox (Production)
-
-Runs in isolated Firecracker microVMs on Vercel's infrastructure.
-
-```typescript
-import { createAgentTools, createVercelSandbox } from "bashkit";
-
-// Async - automatically installs ripgrep for Grep tool
-const sandbox = await createVercelSandbox({
-  runtime: "node22",
-  resources: { vcpus: 2 },
-  // ensureTools: true (default) - auto-setup ripgrep
-  // ensureTools: false - skip for faster startup if you don't need Grep
-});
-const { tools } = createAgentTools(sandbox);
-
-// Don't forget to cleanup
-await sandbox.destroy();
-```
-
-### E2BSandbox (Production)
-
-Runs in E2B's cloud sandboxes. Requires `@e2b/code-interpreter` peer dependency.
-
-```typescript
-import { createAgentTools, createE2BSandbox } from "bashkit";
-
-// Async - automatically installs ripgrep for Grep tool
-const sandbox = await createE2BSandbox({
-  apiKey: process.env.E2B_API_KEY,
-  // ensureTools: true (default) - auto-setup ripgrep
-  // ensureTools: false - skip for faster startup if you don't need Grep
-});
-const { tools } = createAgentTools(sandbox);
-
-await sandbox.destroy();
-```
-
-### Sandbox Reconnection (Cloud Sandboxes)
-
-Cloud sandboxes (E2B, Vercel) support reconnection via the `id` property and `sandboxId` config:
-
-```typescript
-// Create a new sandbox
-const sandbox = await createE2BSandbox({ apiKey: process.env.E2B_API_KEY });
-
-// Sandbox ID is available immediately after creation
-const sandboxId = sandbox.id; // "sbx_abc123..."
-
-// Store sandboxId in your database (e.g., chat metadata)
-await db.chat.update({ where: { id: chatId }, data: { sandboxId } });
-
-// Later: reconnect to the same sandbox (fast - ripgrep already installed)
-const savedId = chat.sandboxId;
-const reconnected = await createE2BSandbox({
-  apiKey: process.env.E2B_API_KEY,
-  sandboxId: savedId, // Reconnects instead of creating new
-});
-```
-
-This is useful for:
-- Reusing sandboxes across multiple requests in the same conversation
-- Persisting sandbox state between server restarts
-- Reducing sandbox creation overhead
+This file is for **agents and humans working ON bashkit**. For consumer-facing API usage (how to *use* bashkit in an app), see `README.md`. For folder-specific internals, see the `AGENTS.md` inside each `src/*` directory.
 
-## Internal Architecture
+> **Before editing anything inside `src/<folder>/`, read `src/<folder>/AGENTS.md` first.** Every folder has one. They document internal file layout, key exports, data flows, and per-task modification steps. This root file intentionally does not duplicate them — if you only read this file, you are missing half the picture.
 
-For developers working on bashkit internals, each source folder has its own `AGENTS.md`:
-
-- `src/sandbox/AGENTS.md` -- Execution environment abstractions
-- `src/tools/AGENTS.md` -- Tool implementations
-- `src/cache/AGENTS.md` -- Tool result caching
-- `src/middleware/AGENTS.md` -- AI SDK middleware
-- `src/utils/AGENTS.md` -- Utility functions
-- `src/skills/AGENTS.md` -- Agent Skills support
-- `src/setup/AGENTS.md` -- Environment setup
-- `src/cli/AGENTS.md` -- CLI initialization
-
-See also `CLAUDE.md` for development workflow and conventions.
-
-## Available Tools
-
-### Default Tools (always included)
-
-| Tool | Purpose | Key Inputs |
-|------|---------|------------|
-| `Bash` | Execute shell commands | `command`, `timeout`, `description` |
-| `Read` | Read files or list directories | `file_path`, `offset`, `limit` |
-| `Write` | Create/overwrite files | `file_path`, `content` |
-| `Edit` | Replace strings in files | `file_path`, `old_string`, `new_string`, `replace_all` |
-| `Glob` | Find files by pattern | `pattern`, `path` |
-| `Grep` | Search file contents | `pattern`, `path`, `output_mode`, `-i`, `-C` |
-
-> **Note on nullable types:** Optional parameters use `T | null` (not `T | undefined`) for OpenAI structured outputs compatibility. AI models should send explicit `null` for parameters they don't want to set. This works with both OpenAI and Anthropic models.
-
-### Optional Tools (via config)
-
-| Tool | Purpose | Config Key |
-|------|---------|------------|
-| `AskUser` | Ask user clarifying questions | `askUser: true` |
-| `EnterPlanMode` | Enter planning/exploration mode | `planMode: true` |
-| `ExitPlanMode` | Exit planning mode with a plan | `planMode: true` |
-| `Skill` | Execute skills | `skill: { skills }` |
-| `WebSearch` | Search the web | `webSearch: { apiKey }` |
-| `WebFetch` | Fetch URL and process with AI | `webFetch: { apiKey, model }` |
-
-### Workflow Tools (created separately)
-
-| Tool | Purpose | Factory |
-|------|---------|---------|
-| `Task` | Spawn sub-agents | `createTaskTool({ model, tools, subagentTypes? })` |
-| `TodoWrite` | Track task progress | `createTodoWriteTool(state, config?, onUpdate?)` |
-
-### Web Tools (require `parallel-web` peer dependency)
+---
 
-| Tool | Purpose | Factory |
-|------|---------|---------|
-| `WebSearch` | Search the web | `createWebSearchTool({ apiKey })` |
-| `WebFetch` | Fetch URL and process with AI | `createWebFetchTool({ apiKey, model })` |
+## Core Principles
 
-## Using with AI SDK generateText
+These apply to every PR, no exceptions:
 
-```typescript
-import { generateText, wrapLanguageModel, stepCountIs } from "ai";
-import { anthropic } from "@ai-sdk/anthropic";
-import {
-  createAgentTools,
-  createLocalSandbox,
-  anthropicPromptCacheMiddleware,
-} from "bashkit";
+1. **Fully typed.** No `any`. Use `unknown` at untrusted boundaries and narrow with guards. Public APIs must have explicit return types — don't rely on inference for exports. Tool input/output shapes live in Zod schemas + exported TypeScript interfaces that stay in sync.
+2. **Testable and tested.** Every public export has a test. Tests mirror `src/` layout in `tests/`. Bug fixes include a regression test. If a change is hard to test, refactor until it isn't.
+3. **Typecheck and lint before pushing.** `bun run typecheck && bun run check && bun run test` must be green locally. CI will reject otherwise.
+4. **Return errors, don't throw.** Tools return `{ error: string }` objects so the model can see the failure. Only sandbox-layer code throws, and tools catch it.
+5. **Config-driven, not flag-driven.** Optional features are enabled by the *presence* of a config object (e.g. `webSearch: { apiKey }`), not by boolean flags. Defaults live in factories via `config?.field ?? default`.
+6. **No breaking changes without a major bump.** See the Breaking Change Surface section below before touching the `Sandbox` interface, tool schemas, tool names, `ContextLayer`, or `createAgentTools` return shape.
+7. **Docs live next to code.** When you change files in a folder, update that folder's `AGENTS.md` in the same PR.
 
-const sandbox = createLocalSandbox({ cwd: "/tmp/workspace" });
-const { tools } = createAgentTools(sandbox);
+---
 
-// Wrap model with prompt caching (recommended)
-const model = wrapLanguageModel({
-  model: anthropic("claude-sonnet-4-20250514"),
-  middleware: anthropicPromptCacheMiddleware,
-});
+## References
 
-const result = await generateText({
-  model,
-  tools,
-  system: "You are a helpful coding assistant.",
-  prompt: "Create a hello world TypeScript file and run it",
-  stopWhen: stepCountIs(10), // Allow up to 10 tool-call rounds
-  onStepFinish: ({ finishReason, toolCalls, toolResults, usage }) => {
-    // Log progress
-    console.log(`Step finished: ${finishReason}`);
-    for (const call of toolCalls || []) {
-      console.log(`  Tool: ${call.toolName}`);
-    }
-  },
-});
+If a `references/` directory exists at the project root, search it for implementation patterns when building new features. It is gitignored — contributors symlink or clone repos locally.
 
-await sandbox.destroy();
-```
+- `references/codex` — OpenAI Codex CLI. Tool designs, agent loop, sandboxing patterns.
+- `references/pi-mono` — pi-mono monorepo. See `packages/coding-agent` for agent loop patterns.
 
-## Sub-agents with Task Tool
-
-The Task tool spawns new agents for complex subtasks:
-
-```typescript
-import { createTaskTool } from "bashkit";
-
-const taskTool = createTaskTool({
-  model: anthropic("claude-sonnet-4-20250514"),
-  tools: sandboxTools,
-  subagentTypes: {
-    research: {
-      model: anthropic("claude-haiku-3"), // Cheaper model for research
-      systemPrompt: "You are a research specialist. Find information only.",
-      tools: ["Read", "Grep", "Glob"], // Limited tools
-    },
-    coding: {
-      systemPrompt: "You are a coding expert. Write clean code.",
-      tools: ["Read", "Write", "Edit", "Bash"],
-    },
-  },
-});
+---
 
-// Add to tools
-const allTools = { ...sandboxTools, Task: taskTool };
-```
+## Code Organization
 
-The parent agent calls Task like any other tool:
-```typescript
-// Agent decides to delegate:
-{ tool: "Task", args: {
-  description: "Research API patterns",
-  prompt: "Find best practices for REST APIs",
-  subagent_type: "research"
-}}
 ```
-
-### Streaming Sub-agent Activity to UI
-
-Pass a `streamWriter` to stream real-time sub-agent activity:
-
-```typescript
-import { createUIMessageStream } from "ai";
-
-const stream = createUIMessageStream({
-  execute: async ({ writer }) => {
-    const taskTool = createTaskTool({
-      model,
-      tools: sandboxTools,
-      streamWriter: writer, // Enable real-time streaming
-      subagentTypes: { ... },
-    });
-
-    const result = streamText({
-      model,
-      tools: { Task: taskTool },
-      ...
-    });
-
-    writer.merge(result.toUIMessageStream());
-  },
-});
+src/
+├── sandbox/       # Execution environments (Local, Vercel, E2B) — src/sandbox/AGENTS.md
+├── tools/         # Tool implementations — src/tools/AGENTS.md
+├── context/       # Prompt assembly + tool execution layers — src/context/AGENTS.md
+├── cache/         # Tool result caching (LRU, Redis) — src/cache/AGENTS.md
+├── middleware/    # AI SDK language model middleware — src/middleware/AGENTS.md
+├── utils/         # Budget, compaction, context status, helpers — src/utils/AGENTS.md
+├── skills/        # Agent Skills standard — src/skills/AGENTS.md
+├── setup/         # Agent environment setup (sandbox bootstrapping) — src/setup/AGENTS.md
+├── cli/           # CLI initialization — src/cli/AGENTS.md
+├── types.ts       # AgentConfig, ToolConfig, DEFAULT_CONFIG
+└── index.ts       # Barrel re-exports (public API surface)
 ```
 
-When `streamWriter` is provided:
-- Uses `streamText` internally (instead of `generateText`)
-- Emits `data-subagent` events: `start`, `tool-call`, `done`, `complete`
-- Events appear in `message.parts` as `{ type: "data-subagent", data: SubagentEventData }`
+**Each folder has its own `AGENTS.md`** with file listings, exports, internal architecture, and per-task modification guides.
 
-**Note:** TaskOutput does NOT include messages (to avoid context bloat). The UI accesses the full conversation via the streamed `complete` event.
+### AGENTS.md Conventions (enforced in CI)
 
-## Prompt Caching
+- Every folder under `src/` **must** have an `AGENTS.md`. When you add a new folder, add one.
+- Every `AGENTS.md` (except the root) **must** have a co-located `CLAUDE.md` symlink pointing to it.
+- Automation: `bun run link-agents` creates missing symlinks; `bun run check:agents` fails CI if any are missing.
+- When you **add, remove, or significantly change** files in a folder, update that folder's `AGENTS.md` in the same PR. Stale folder docs are worse than no docs.
 
-Enable Anthropic prompt caching to reduce costs on repeated prefixes:
+---
 
-```typescript
-import { wrapLanguageModel } from "ai";
-import { anthropicPromptCacheMiddleware } from "bashkit";
+## Development Workflow
 
-const model = wrapLanguageModel({
-  model: anthropic("claude-sonnet-4-20250514"),
-  middleware: anthropicPromptCacheMiddleware,
-});
+### Build & Typecheck
 
-// Check cache stats in result
-console.log({
-  cacheCreation: result.providerMetadata?.anthropic?.cacheCreationInputTokens,
-  cacheRead: result.providerMetadata?.anthropic?.cacheReadInputTokens,
-});
+```bash
+bun install
+bun run typecheck   # ALWAYS run before bun run build
+bun run build       # Bun bundles to dist/index.js + tsc emits .d.ts
 ```
 
-## Web Tools
+**Critical**: `bun run build` does **not** fail on type errors during bundling. Run `bun run typecheck` first or type regressions will ship silently.
 
-WebSearch and WebFetch tools provide web access capabilities using the [Parallel API](https://docs.parallel.ai).
+### Tests
 
-### Setup
+Use Vitest via `bun run test` — **not** `bun test` (which runs Bun's built-in runner and will miss our suite).
 
 ```bash
-# Install the parallel-web peer dependency
-bun add parallel-web
-
-# Set your API key
-export PARALLEL_API_KEY="your_api_key"
+bun run test                              # all tests
+bun run test tests/utils/budget.test.ts   # single file
+bun run test:watch                        # watch mode
+bun run test:coverage                     # with coverage
 ```
 
-### WebSearch
+Tests live in `tests/<folder>/` mirroring `src/<folder>/`. Examples in `/examples/` serve as integration tests and require sandbox/API-key env vars.
 
-Search the web and get formatted results:
+**Everything non-trivial ships with tests.** New tools, new context layers, new utilities, new sandbox methods — all get unit tests before merging. Bug fixes include a regression test that would have caught the bug. If you can't easily test something, that's a signal the abstraction is wrong, not a reason to skip the test.
 
-```typescript
-import { createWebSearchTool } from "bashkit";
+### Lint & Format
 
-const webSearch = createWebSearchTool({
-  apiKey: process.env.PARALLEL_API_KEY!,
-});
-
-// Add to your tools
-const tools = {
-  ...sandboxTools,
-  WebSearch: webSearch,
-};
-```
+Biome handles both:
 
-**Input:**
-- `query` - The search query
-- `allowed_domains?` - Only include results from these domains
-- `blocked_domains?` - Exclude results from these domains
-
-**Output:**
-```typescript
-{
-  results: Array<{ title: string; url: string; snippet: string; metadata?: Record<string, any> }>;
-  total_results: number;
-  query: string;
-}
+```bash
+bun run check       # lint + format, auto-fix
+bun run check:ci    # lint + format, no writes (CI gate)
+bun run format      # format only
+bun run lint        # lint only
 ```
 
-### WebFetch
+Run `bun run check` before pushing. CI runs `check:ci`, `typecheck`, `test`, and `check:agents` — all four must pass.
 
-Fetch a URL and process the content with an AI model:
+### Commits & PRs
 
-```typescript
-import { createWebFetchTool } from "bashkit";
-import { anthropic } from "@ai-sdk/anthropic";
+- Commits are small, imperative, sentence-case: `Add budget tracking`, `Refactor AskUser tool to deferred client-rendered model`, `Fix lint and typecheck CI failures`.
+- One logical change per commit. Keep refactors separate from feature work.
+- PR titles follow the same style as commits. PR descriptions should explain *why*, link relevant issues, and call out any public API changes.
+- CI gates: `typecheck`, `check:ci` (Biome), `test`, `check:agents`. All four must pass before merge.
 
-const webFetch = createWebFetchTool({
-  apiKey: process.env.PARALLEL_API_KEY!,
-  model: anthropic("claude-haiku-3"), // Use a fast/cheap model for processing
-});
+### Local Iteration Loop
 
-// Add to your tools
-const tools = {
-  ...sandboxTools,
-  WebFetch: webFetch,
-};
-```
+Use `LocalSandbox` (Bun APIs, no network) for fast iteration. Swap to `VercelSandbox` / `E2BSandbox` when you need to verify production behavior.
 
-**Input:**
-- `url` - The URL to fetch
-- `prompt` - The prompt to run on the fetched content
-
-**Output:**
-```typescript
-{
-  response: string;      // AI model's response to the prompt
-  url: string;
-  final_url?: string;    // Final URL after redirects
-  status_code?: number;
-}
+```bash
+bun examples/test-tools.ts                # direct tool calls, no AI
+ANTHROPIC_API_KEY=xxx bun examples/basic.ts  # full agentic loop
 ```
 
-## Agent Skills
-
-bashkit supports the [Agent Skills](https://agentskills.io) standard for progressive skill loading.
-
-> **Note:** Skill discovery is for **LocalSandbox** use cases where the agent has filesystem access. For cloud sandboxes, bundle skills with your app directly.
-
-### Discovering Skills (LocalSandbox)
-
-When using LocalSandbox, discover project and user-global skills:
+---
 
-```typescript
-import { discoverSkills, skillsToXml } from "bashkit";
+## Code Conventions
 
-// Discovers from .skills/ (project) and ~/.bashkit/skills/ (user-global)
-const skills = await discoverSkills();
-```
+### Naming
 
-### Using Skills with Agents
+| Element | Convention | Examples |
+|---|---|---|
+| Tool names | PascalCase | `Bash`, `Read`, `WebSearch` |
+| Factories | `createX` | `createBashTool`, `createLocalSandbox` |
+| Output types | `XOutput` | `BashOutput`, `ReadOutput` |
+| Error types | `XError` | `BashError`, `ReadError` |
+| Config types | `XConfig` | `ToolConfig`, `AgentConfig` |
+| Files | kebab-case | `bash.ts`, `anthropic-cache.ts` |
 
-```typescript
-import { discoverSkills, skillsToXml, createAgentTools, createLocalSandbox } from "bashkit";
-import { generateText, stepCountIs } from "ai";
-import { anthropic } from "@ai-sdk/anthropic";
+### Type Organization
 
-const skills = await discoverSkills();
-const sandbox = createLocalSandbox({ cwd: "/tmp/workspace" });
-const { tools } = createAgentTools(sandbox);
+- **Input schemas**: colocated with tool implementation (`src/tools/bash.ts` defines `bashInputSchema`).
+- **Output/Error types**: exported from the tool file; tools return `Output | Error` unions.
+- **Config types**: centralized in `src/types.ts`.
+- **Error handling**: tools **return** `{ error: string }` objects — they do not throw. Sandbox methods may throw; tools catch them.
 
-const result = await generateText({
-  model: anthropic("claude-sonnet-4-20250514"),
-  tools,
-  system: `You are a coding assistant.
+### `.nullable()` over `.optional()` for tool inputs
 
-${skillsToXml(skills)}
+All optional tool parameters use `z.nullable()`, **not** `z.optional()`. OpenAI structured outputs require every property in the `required` array; `.optional()` removes them and breaks OpenAI. `.nullable()` keeps them required but allows `null`, and works on both Anthropic and OpenAI.
 
-When a task matches a skill, use the Read tool to load its full instructions from the location path.`,
-  prompt: "Extract text from invoice.pdf",
-  stopWhen: stepCountIs(10),
+```ts
+const schema = z.object({
+  timeout: z.number().nullable(),
+  replace_all: z.boolean().nullable(),
 });
-```
 
-### How It Works
-
-1. `discoverSkills()` loads only metadata (name, description, path) - ~50-100 tokens per skill
-2. `skillsToXml()` generates XML listing available skills
-3. Agent decides when to activate a skill by reading its SKILL.md with the Read tool
-4. Full instructions enter context only when the skill is actually used
-
-### Creating Skills
-
-Create `.skills/<skill-name>/SKILL.md`:
-
-```markdown
----
-name: pdf-processing
-description: Extract text and tables from PDF files.
----
-
-# PDF Processing
-
-Instructions for the agent...
+// Destructuring defaults (= value) only fire on undefined, NOT null.
+// Always use ?? for defaults with nullable fields:
+const { timeout, replace_all: rawReplaceAll } = input;
+const effectiveTimeout = timeout ?? 120000;
+const replaceAll = rawReplaceAll ?? false;
 ```
 
-### Using Remote Skills
+### Configuration Pattern
 
-Fetch complete skill folders from GitHub repositories (e.g., Anthropic's official skills):
+Tool factories accept an optional `ToolConfig` and merge with defaults inline:
 
-```typescript
-import { fetchSkill, fetchSkills, setupAgentEnvironment } from "bashkit";
-
-// Fetch a single skill (gets all files: SKILL.md, scripts/, etc.)
-const pdfSkill = await fetchSkill('anthropics/skills/pdf');
-
-// Or batch fetch multiple
-const remoteSkills = await fetchSkills([
-  'anthropics/skills/pdf',
-  'anthropics/skills/web-research',
-]);
-
-// Use with setupAgentEnvironment
-const config = {
-  skills: {
-    ...remoteSkills,
-    'my-custom': myContent,
-  },
-};
-const { skills } = await setupAgentEnvironment(sandbox, config);
+```ts
+export function createBashTool(sandbox: Sandbox, config?: ToolConfig) {
+  const timeout = config?.timeout ?? 120000;
+  // ...
+}
 ```
 
-**Format:** `owner/repo/skillName` (fetches entire skill folder from GitHub)
+Optional features (WebSearch, WebFetch, cache, budget, context layers) are enabled by **config presence** in `createAgentTools` — don't gate them on feature flags.
 
-## Setting Up Agent Environments
-
-For cloud sandboxes, use `setupAgentEnvironment` to create workspace directories and seed skills:
-
-```typescript
-import { setupAgentEnvironment, skillsToXml, createAgentTools, createVercelSandbox } from "bashkit";
+---
 
-const config = {
-  workspace: {
-    notes: 'files/notes/',
-    outputs: 'files/outputs/',
-  },
-  skills: {
-    'web-research': webResearchSkillContent,
-  },
-};
+## Core Abstractions
 
-const sandbox = await createVercelSandbox({});
-const { skills } = await setupAgentEnvironment(sandbox, config);
+### Sandbox Interface
 
-// Use same config in prompt - stays in sync!
-const systemPrompt = `Save notes to: ${config.workspace.notes}
-${skillsToXml(skills)}
-`;
+All tools depend on `Sandbox` from `src/sandbox/interface.ts`, not concrete implementations. Adding a method is a breaking change for every implementer.
 
-const { tools } = createAgentTools(sandbox);
+```ts
+interface Sandbox {
+  exec(command: string, options?: ExecOptions): Promise<ExecResult>;
+  readFile(path: string): Promise<string>;
+  writeFile(path: string, content: string): Promise<void>;
+  readDir(path: string): Promise<string[]>;
+  fileExists(path: string): Promise<boolean>;
+  isDirectory(path: string): Promise<boolean>;
+  destroy(): Promise<void>;
+  readonly id?: string;   // for cloud reconnection
+  rgPath?: string;        // set by ensureSandboxTools
+}
 ```
 
-## Common Patterns
-
-### Full Agent Setup
-
-```typescript
-import { generateText, wrapLanguageModel, stepCountIs } from "ai";
-import { anthropic } from "@ai-sdk/anthropic";
-import {
-  createAgentTools,
-  createTaskTool,
-  createTodoWriteTool,
-  createLocalSandbox,
-  anthropicPromptCacheMiddleware,
-  type TodoState,
-} from "bashkit";
-
-// 1. Create sandbox
-const sandbox = createLocalSandbox({ cwd: "/tmp/workspace" });
+`createVercelSandbox()` and `createE2BSandbox()` are **async** and auto-run `ensureSandboxTools` to install ripgrep so `Grep` works immediately. `createLocalSandbox()` is sync.
 
-// 2. Create sandbox tools
-const { tools: sandboxTools } = createAgentTools(sandbox);
-
-// 3. Create model with caching
-const model = wrapLanguageModel({
-  model: anthropic("claude-sonnet-4-20250514"),
-  middleware: anthropicPromptCacheMiddleware,
-});
-
-// 4. Create workflow tools
-const todoState: TodoState = { todos: [] };
-const todoTool = createTodoWriteTool(todoState);
-const taskTool = createTaskTool({ model, tools: sandboxTools });
-
-// 5. Combine all tools
-const tools = {
-  ...sandboxTools,
-  TodoWrite: todoTool,
-  Task: taskTool,
-};
-
-// 6. Run agent
-const result = await generateText({
-  model,
-  tools,
-  system: "You are a coding assistant. Use TodoWrite to plan tasks.",
-  prompt: "Build a REST API with Express",
-  stopWhen: stepCountIs(15),
-});
+### Context Layer
 
-// 7. Cleanup
-await sandbox.destroy();
-```
+`src/context/` provides two separate concerns:
 
-### Tool Configuration
-
-Restrict tools with configuration:
-
-```typescript
-const { tools } = createAgentTools(sandbox, {
-  tools: {
-    Bash: {
-      enabled: true,
-      blockedCommands: ["rm -rf", "sudo"],
-      maxOutputLength: 30000,
-    },
-    Write: {
-      enabled: true,
-      allowedPaths: ["/tmp/workspace"],
-      maxFileSize: 1_000_000,
-    },
-  },
-});
-```
+1. **Static system prompt assembly** (`buildSystemContext`) — discovers `AGENTS.md` / `CLAUDE.md` files, collects environment info (cwd, git branch, platform), builds tool guidance. Called **once at init**, must stay stable across turns for Anthropic prompt caching.
+2. **Dynamic per-step layers** (`withContext`, `applyContextLayers`, `createExecutionPolicy`, `createOutputPolicy`) — intercept every tool call (`beforeExecute` gate, `afterExecute` transform). `createPrepareStep` composes compaction + context-status + plan-mode hints into an AI SDK `prepareStep` callback.
 
-## Tool Result Caching
+Never mutate `system` from `prepareStep` — it will break prompt caching. Dynamic hints go in `messages` as user content.
 
-Cache tool execution results to avoid redundant operations:
+### Tool Composition
 
-```typescript
-import { createAgentTools, createLocalSandbox } from "bashkit";
+`createAgentTools(sandbox, config)` is the single entry point that wires tools + cache + budget + context layers from a config object. Everything else is either internal or a lower-level primitive.
 
-const sandbox = createLocalSandbox({ cwd: "/tmp/workspace" });
+---
 
-// Enable caching with defaults (LRU, 5min TTL)
-const { tools } = createAgentTools(sandbox, { cache: true });
+## Component Interactions
 
-// Or customize caching behavior
-const { tools } = createAgentTools(sandbox, {
-  cache: {
-    ttl: 10 * 60 * 1000,  // 10 minutes
-    debug: true,          // Log cache hits/misses
-    Read: true,           // Enable for Read
-    Glob: true,           // Enable for Glob
-    Grep: false,          // Disable for Grep
-  },
-});
 ```
-
-**Default cached tools:** Read, Glob, Grep, WebFetch, WebSearch
-
-**Not cached by default:** Bash, Write, Edit (have side effects)
-
-### Cache Callbacks
-
-Track cache performance with callbacks:
-
-```typescript
-const { tools } = createAgentTools(sandbox, {
-  cache: {
-    onHit: (toolName, key) => {
-      metrics.increment(`cache.hit.${toolName}`);
-    },
-    onMiss: (toolName, key) => {
-      metrics.increment(`cache.miss.${toolName}`);
-    },
-  },
-});
+User code → Vercel AI SDK → Tool (wrapped w/ context layers + cache)
+                               ↓
+                            Sandbox interface
+                               ↓
+              ┌────────────────┼────────────────┐
+              ↓                ↓                ↓
+         LocalSandbox    VercelSandbox      E2BSandbox
+              ↓                ↓                ↓
+           Bun APIs      Firecracker VM     E2B service
 ```
 
-### Cache Stats
-
-Cached tools have additional methods:
-
-```typescript
-import type { CachedTool } from "bashkit";
-
-const readTool = tools.Read as CachedTool;
+---
 
-// Check cache performance (async for Redis compatibility)
-console.log(await readTool.getStats());
-// { hits: 5, misses: 2, hitRate: 0.71, size: 2 }
+## Dependencies
 
-// Clear cache
-await readTool.clearCache();        // Clear all
-await readTool.clearCache("key");   // Clear specific entry
-```
+**Required peer deps**: `ai` ^5.0.0, `zod` ^4.1.8.
 
-### Redis Cache Store
+**Optional peer deps** — users pick their execution environment:
+- `@vercel/sandbox` ^1.0.0 — Vercel Firecracker isolation
+- `@e2b/code-interpreter` ^1.0.0 — E2B hosted execution
+- `parallel-web` ^1.0.0 — WebSearch / WebFetch backend
 
-Use your existing Redis client with the helper:
+All deps are marked **external** at build time so consumers don't get a duplicated `ai`/`zod` bundle.
 
-```typescript
-import { createRedisCacheStore, createAgentTools } from "bashkit";
+---
 
-const store = createRedisCacheStore(myRedisClient);
-const { tools } = createAgentTools(sandbox, { cache: store });
-```
+## Breaking Change Surface
 
-Works with `redis`, `ioredis`, or any client with `get`, `set`, `del`, `keys` methods. TTL is handled by the wrapper for consistent behavior across all cache backends.
+Anything in this list requires a **major version bump**:
 
-### Custom Cache Store
+1. **`Sandbox` interface** (`src/sandbox/interface.ts`) — adding methods breaks every implementer.
+2. **Tool input schemas** — AI models see these in prompts; removing or renaming fields breaks live integrations.
+3. **Tool output/error shapes** — consumers pattern-match on them.
+4. **Tool names** — they appear verbatim in prompts ("use the Bash tool").
+5. **`ContextLayer` signature** (`src/context/index.ts`) — changes ripple through every custom layer downstream.
+6. **`SystemContext` shape** (`src/context/build-context.ts`) — consumers read individual sections.
+7. **`createAgentTools` return shape** — `AgentToolsResult` is a public contract.
 
-For other backends, implement the `CacheStore` interface:
+Safe in minor/patch:
+- Adding new optional config fields
+- Adding new tools or sandbox implementations
+- Internal refactors that preserve public API
+- Bug fixes
 
-```typescript
-import type { CacheStore } from "bashkit";
+---
 
-const myStore: CacheStore = {
-  get(key) { /* return CacheEntry or undefined */ },
-  set(key, entry) { /* store entry */ },
-  delete(key) { /* remove entry */ },
-  clear() { /* remove all entries */ },
-  size() { /* optional: return count */ },
-};
+## Security Reminders
 
-const { tools } = createAgentTools(sandbox, { cache: myStore });
-```
+The `Bash` tool executes arbitrary commands inside the sandbox — that's the whole point, but it means production deployments **must**:
 
-### Standalone Caching
+- Run inside a real sandbox (Vercel or E2B), not LocalSandbox.
+- Set `blockedCommands` + `timeout` on `Bash`.
+- Set `allowedPaths` on `Read` / `Write` / `Edit`.
+- Set `maxFileSize` on `Write`.
+- Never expose the raw agent loop to untrusted users without an additional auth layer.
 
-Wrap individual tools with caching:
+See `src/tools/AGENTS.md` for per-tool config details.
 
-```typescript
-import { cached, LRUCacheStore } from "bashkit";
+---
 
-const cachedTool = cached(myTool, "MyTool", {
-  ttl: 60000,       // 1 minute
-  debug: true,      // Log cache activity
-  store: new LRUCacheStore(500),  // Max 500 entries
-});
-```
+## Common Implementation Tasks
+
+| Task | Where to start |
+|---|---|
+| Add a new tool | `src/tools/AGENTS.md` → "Common Modifications" |
+| Add a new sandbox | `src/sandbox/AGENTS.md` → "Common Modifications" |
+| Add middleware | `src/middleware/AGENTS.md` → "Common Modifications" |
+| Add a cache backend | `src/cache/AGENTS.md` → "Common Modifications" |
+| Add a context layer or prompt section | `src/context/AGENTS.md` → "Common Modifications" |
+| Add a skill source | `src/skills/AGENTS.md` → "Common Modifications" |
+| Add a config field | Define in `src/types.ts`, consume in the relevant factory via `config?.yourField ?? default` |
diff --git a/CLAUDE.md b/CLAUDE.md
deleted file mode 100644
index a0c169a..0000000
--- a/CLAUDE.md
+++ /dev/null
@@ -1,649 +0,0 @@
-# BashKit - Claude Code Guide
-
-> Agentic coding tools for Vercel AI SDK
-
-**Tech Stack**: TypeScript • Bun • Vercel AI SDK • Zod
-**Inspired by**: Claude Code tools
-**Version**: 0.4.0
-
----
-
-## Project Overview
-
-### What BashKit Solves
-
-BashKit provides a comprehensive toolkit for building AI coding agents using the Vercel AI SDK. It bridges the gap between AI models like Claude and actual code execution environments, enabling agents to:
-
-- Execute bash commands
-- Read, write, and edit files
-- Search codebases with glob/grep
-- Fetch web content and perform searches
-- Spawn sub-agents for complex tasks
-- Manage state with todo lists
-
-### Key Features
-
-**10 Tools Available**:
-- **Bash** - Execute shell commands with timeout control
-- **Read** - Read files and list directories
-- **Write** - Create or overwrite files
-- **Edit** - Replace strings in existing files
-- **Glob** - Find files by pattern matching
-- **Grep** - Search file contents with regex
-- **WebSearch** - Web search with domain filtering
-- **WebFetch** - Fetch and analyze web URLs
-- **Task** - Spawn sub-agents for complex work
-- **TodoWrite** - Manage structured task lists
-
-### Architecture Philosophy
-
-1. **Bring Your Own Sandbox** - Start with LocalSandbox, swap to Vercel/E2B for production
-2. **Type-Safe** - Full TypeScript with proper inference
-3. **Configurable** - Security controls and limits at the tool level
-4. **Composable** - Tools work together seamlessly
-5. **Claude Code Compatible** - Tool signatures match Claude Code patterns
-
-### Use Cases
-
-- AI coding assistants and agents
-- Automated development workflows
-- Interactive code exploration tools
-- Educational coding environments
-- CI/CD automation with AI
-
----
-
-## Architecture & Patterns
-
-### Code Organization
-
-```
-src/
-├── sandbox/         # Execution environment abstractions (see src/sandbox/AGENTS.md)
-├── tools/           # Tool implementations (see src/tools/AGENTS.md)
-├── cache/           # Tool result caching (see src/cache/AGENTS.md)
-├── middleware/      # Vercel AI SDK middleware (see src/middleware/AGENTS.md)
-├── utils/           # Utility functions (see src/utils/AGENTS.md)
-├── skills/          # Agent Skills standard (see src/skills/AGENTS.md)
-├── setup/           # Agent environment setup (see src/setup/AGENTS.md)
-├── cli/             # CLI initialization (see src/cli/AGENTS.md)
-├── types.ts         # Configuration types
-└── index.ts         # Main exports (barrel file)
-```
-
-Each folder has its own `AGENTS.md` with detailed file descriptions, key exports, architecture, and modification guides.
-
-### Key Design Patterns
-
-#### 1. Factory Pattern
-All tools and sandboxes created via factory functions:
-```typescript
-const sandbox = createLocalSandbox({ workingDirectory: '/tmp' });
-const { tools } = await createAgentTools(sandbox, config);
-```
-
-#### 2. Sandbox Abstraction
-Tools depend on the `Sandbox` interface, not specific implementations:
-```typescript
-interface Sandbox {
-  exec(command: string, options?: ExecOptions): Promise<ExecResult>;
-  readFile(path: string): Promise<string>;
-  writeFile(path: string, content: string): Promise<void>;
-  readDir(path: string): Promise<string[]>;
-  fileExists(path: string): Promise<boolean>;
-  isDirectory(path: string): Promise<boolean>;
-  destroy(): Promise<void>;
-  readonly id?: string;  // Sandbox ID for reconnection (cloud only)
-  rgPath?: string;       // Path to ripgrep (set by ensureSandboxTools)
-}
-```
-
-**Note**: `createVercelSandbox()` and `createE2BSandbox()` are async and auto-setup ripgrep:
-```typescript
-const sandbox = await createE2BSandbox({ apiKey: '...' });
-// rgPath is already set, Grep tool works immediately
-```
-
-#### 3. Tool Composition
-Tools assembled into a ToolSet for Vercel AI SDK:
-```typescript
-const { tools } = await createAgentTools(sandbox, {
-  tools: { Bash: { timeout: 30000 } },
-  webSearch: { apiKey: process.env.PARALLEL_API_KEY }
-});
-// Returns: { Bash, Read, Write, Edit, Glob, Grep, WebSearch }
-```
-
-#### 4. Middleware System
-Language models wrapped for cross-cutting concerns:
-```typescript
-const model = wrapLanguageModel({
-  model: anthropic('claude-sonnet-4-5'),
-  middleware: anthropicPromptCacheMiddleware
-});
-```
-
-#### 5. Configuration as Code
-Zod schemas define and validate all tool inputs:
-```typescript
-const bashInputSchema = z.object({
-  command: z.string(),
-  description: z.string().nullable(),
-  timeout: z.number().nullable()
-});
-```
-
-#### 6. Nullable Types for AI Provider Compatibility
-
-All optional tool parameters use `.nullable()` instead of `.optional()` for OpenAI structured outputs compatibility.
-
-**Why `.nullable()` instead of `.optional()`:**
-- OpenAI structured outputs require all properties in the `required` array
-- `.optional()` removes properties from `required` (breaks OpenAI)
-- `.nullable()` keeps properties in `required` but allows `null` values
-- Works with both OpenAI and Anthropic models
-
-**Pattern for handling nullable values:**
-```typescript
-// Zod schema uses .nullable()
-const schema = z.object({
-  timeout: z.number().nullable(),
-  replace_all: z.boolean().nullable(),
-});
-
-// In execute function, use ?? for defaults
-// NOTE: Destructuring defaults (= value) only work with undefined, NOT null
-const { timeout, replace_all: rawReplaceAll } = input;
-const effectiveTimeout = timeout ?? 120000;
-const replaceAll = rawReplaceAll ?? false;
-```
-
-**Type conventions:**
-- Zod schemas: `.nullable()` → produces `T | null`
-- Exported interfaces: `T | null` (e.g., `description: string | null`)
-- Internal functions: `T | null` for parameters that accept nullable values
-
-#### 7. Tool Result Caching
-Optional caching for tool execution results:
-```typescript
-// Enable with defaults (LRU, 5min TTL)
-const { tools } = await createAgentTools(sandbox, { cache: true });
-
-// Per-tool control
-const { tools } = await createAgentTools(sandbox, {
-  cache: { Read: true, Glob: true, Grep: false }
-});
-
-// Standalone wrapper
-import { cached } from 'bashkit';
-const cachedTool = cached(myTool, 'MyTool', { ttl: 60000 });
-```
-
-**Default cached tools**: Read, Glob, Grep, WebFetch, WebSearch
-**Not cached by default**: Bash, Write, Edit (side effects)
-
-#### 8. Model Registry
-Fetch model info (pricing + context lengths) from a provider:
-```typescript
-// Standalone model info (no budget needed)
-const { tools, openRouterModels } = await createAgentTools(sandbox, {
-  modelRegistry: { provider: "openRouter" },
-});
-// openRouterModels: Map<string, ModelInfo> with pricing + contextLength
-
-// With budget tracking (recommended)
-const { tools, budget, openRouterModels } = await createAgentTools(sandbox, {
-  modelRegistry: { provider: "openRouter" },
-  budget: { maxUsd: 5.00 },
-});
-```
-
-The `modelRegistry` config fetches model data once and shares it with budget tracking, compaction, and any other consumer. When both `modelRegistry` and `budget` are set, only one fetch occurs.
-
-**Legacy support**: `budget.pricingProvider` still works but is deprecated in favor of `modelRegistry`:
-```typescript
-// Still works (deprecated)
-budget: { maxUsd: 5.00, pricingProvider: "openRouter" }
-// Preferred
-modelRegistry: { provider: "openRouter" }, budget: { maxUsd: 5.00 }
-```
-
-#### 9. Budget Tracking
-Cumulative cost tracking across agentic loop steps:
-```typescript
-// Via createAgentTools (recommended)
-const { tools, budget } = await createAgentTools(sandbox, {
-  modelRegistry: { provider: "openRouter" },
-  budget: { maxUsd: 5.00 },
-});
-
-// Standalone usage
-const models = await fetchOpenRouterModels();
-const pricing = new Map([...models].map(([k, v]) => [k, v.pricing]));
-const budget = createBudgetTracker(5.00, { openRouterPricing: pricing });
-```
-
-**Pricing sources** (checked in order):
-1. User-provided `modelPricing` overrides (highest priority)
-2. OpenRouter's free public API (auto-fetched via `modelRegistry`, cached 24h)
-
-**Model ID matching** (PostHog's 3-tier strategy):
-1. Exact match (case-insensitive)
-2. Longest contained match (model variant contains cost variant)
-3. Reverse containment (cost variant contains model variant)
-
-**Task tool integration**: When `budget` is set in `AgentConfig`, the budget tracker auto-wires into all Task tool sub-agents via `stopWhen` and `onStepFinish`.
-
-### Component Interactions
-
-```
-User → Vercel AI SDK → Tool (Bash/Read/Write/etc.)
-                          ↓
-                       Sandbox Interface
-                          ↓
-            ┌─────────────┼─────────────┐
-            ↓             ↓             ↓
-       LocalSandbox  VercelSandbox  E2BSandbox
-            ↓             ↓             ↓
-         Bun API    Firecracker VM   E2B Service
-```
-
----
-
-## File Map (Quick Reference)
-
-Each `src/` subfolder has an `AGENTS.md` with detailed file listings and guides. Key entry points:
-
-- **Configuration**: `/src/types.ts` (ToolConfig, AgentConfig, BudgetConfig, ModelRegistryConfig, DEFAULT_CONFIG)
-- **Main exports**: `/src/index.ts` (barrel file)
-- **Package config**: `/package.json`
-- **Examples**: `/examples/basic.ts`, `/examples/test-tools.ts`, `/examples/test-web-tools.ts`
-
----
-
-## Development Workflow
-
-### Build Commands
-
-```bash
-# IMPORTANT: Always run typecheck BEFORE build when making changes
-bun run typecheck
-
-# Build everything (JS bundle + TypeScript declarations)
-bun run build
-
-# Install dependencies
-bun install
-```
-
-**Workflow**: Always run `bun run typecheck` first to catch type errors before building. The build command does not fail on type errors during the JS bundling step.
-
-**Build Process**:
-1. Bun bundles TypeScript to ESM JavaScript (`dist/index.js`)
-2. TypeScript compiler generates `.d.ts` declarations
-3. All dependencies marked as external (no bundling of `ai`, `zod`, etc.)
-
-### Testing Changes
-
-**Unit tests** use Vitest (run via `bun run test`, NOT `bun test`):
-
-```bash
-# Run all tests
-bun run test
-
-# Run specific test file(s)
-bun run test tests/utils/budget-tracking.test.ts
-
-# Watch mode
-bun run test:watch
-```
-
-**Examples** serve as integration tests:
-
-```bash
-# Test tools directly (no AI, no API key needed)
-bun examples/test-tools.ts
-
-# Test web tools (requires PARALLEL_API_KEY)
-PARALLEL_API_KEY=xxx bun examples/test-web-tools.ts
-
-# Full agentic loop (requires ANTHROPIC_API_KEY)
-ANTHROPIC_API_KEY=xxx bun examples/basic.ts
-```
-
-### Local Development
-
-```typescript
-// Use LocalSandbox for fast iteration
-import { createLocalSandbox, createAgentTools } from './src';
-
-const sandbox = createLocalSandbox({ workingDirectory: '/tmp' });
-const { tools } = await createAgentTools(sandbox);
-
-// Test your changes...
-await tools.Bash.execute({
-  command: 'echo "Hello"',
-  description: 'Test command'
-}, { toolCallId: 'test', messages: [] });
-```
-
-**Pro Tips**:
-- LocalSandbox uses Bun APIs (fast, no network overhead)
-- Use VercelSandbox or E2BSandbox for testing production behavior
-- Check `examples/test-tools.ts` for tool API patterns
-
----
-
-## Common Implementation Tasks
-
-Each task has a detailed step-by-step guide in the relevant folder's `AGENTS.md`:
-
-| Task | Guide Location |
-|------|---------------|
-| Adding a new tool | `src/tools/AGENTS.md` → "Common Modifications" |
-| Implementing a new sandbox | `src/sandbox/AGENTS.md` → "Common Modifications" |
-| Adding middleware | `src/middleware/AGENTS.md` → "Common Modifications" |
-| Adding a cache backend | `src/cache/AGENTS.md` → "Common Modifications" |
-| Adding configuration options | Add types to `/src/types.ts`, use in tool factory via `config?.yourOption ?? default` |
-| Adding a skill source | `src/skills/AGENTS.md` → "Common Modifications" |
-| Setting up agent environments | `src/setup/AGENTS.md` → "Common Modifications" |
-
----
-
-## Code Conventions
-
-### Naming Conventions
-
-| Element | Convention | Examples |
-|---------|------------|----------|
-| Tool names | PascalCase | `Bash`, `Read`, `Write`, `WebSearch` |
-| Factory functions | `createX` prefix | `createBashTool`, `createLocalSandbox` |
-| Output types | `XOutput` suffix | `BashOutput`, `ReadOutput` |
-| Error types | `XError` suffix | `BashError`, `ReadError` |
-| Config types | `XConfig` suffix | `ToolConfig`, `AgentConfig` |
-| Files | kebab-case | `bash.ts`, `anthropic-cache.ts` |
-
-### Type Organization
-
-**Input Schemas** - Colocated with tool implementation:
-```typescript
-// In /src/tools/bash.ts
-const bashInputSchema = z.object({
-  command: z.string(),
-  description: z.string()
-});
-```
-
-**Output Types** - Exported from tool files:
-```typescript
-export interface BashOutput {
-  stdout: string;
-  stderr: string;
-  exit_code: number;
-}
-
-export interface BashError {
-  error: string;
-}
-```
-
-**Union Types** - Tools return `Output | Error`:
-```typescript
-execute: async (input): Promise<BashOutput | BashError> => {
-  // Implementation
-}
-```
-
-**Config Types** - Centralized in `/src/types.ts`:
-```typescript
-export type ToolConfig = { /* ... */ };
-export type AgentConfig = { /* ... */ };
-```
-
-### Error Handling
-
-**Pattern**: Return error objects, don't throw
-
-```typescript
-// ✅ Correct
-try {
-  const result = await sandbox.exec(command);
-  return { stdout: result.stdout };
-} catch (err) {
-  return { error: String(err) };
-}
-
-// ❌ Incorrect
-try {
-  const result = await sandbox.exec(command);
-  return { stdout: result.stdout };
-} catch (err) {
-  throw err; // Don't throw from tools
-}
-```
-
-**Exception**: Sandbox methods can throw (tools catch them)
-
-### Configuration Pattern
-
-**Accept optional config, merge with defaults**:
-
-```typescript
-export function createBashTool(sandbox: Sandbox, config?: ToolConfig) {
-  const timeout = config?.timeout ?? 120000;
-  const maxOutput = config?.maxOutputLength ?? 30000;
-
-  return tool({
-    execute: async (input) => {
-      // Use timeout, maxOutput
-    }
-  });
-}
-```
-
-**Optional features enabled by config presence**:
-
-```typescript
-// WebSearch only added if config provided
-if (config?.webSearch) {
-  tools.WebSearch = createWebSearchTool(config.webSearch);
-}
-```
-
----
-
-## Important Notes & Gotchas
-
-### Dependencies
-
-**Peer Dependencies** (required):
-- `ai` ^5.0.0 - Vercel AI SDK
-- `zod` ^4.1.8 - Schema validation
-
-**Optional Peer Dependencies**:
-- `@vercel/sandbox` ^1.0.0 - Vercel execution environment
-- `@e2b/code-interpreter` ^1.0.0 - E2B code execution
-- `parallel-web` ^1.0.0 - Web search/fetch operations
-
-**Why optional?** Users choose their execution environment:
-- LocalSandbox (no deps) for development
-- VercelSandbox (requires `@vercel/sandbox`) for production
-- E2BSandbox (requires `@e2b/code-interpreter`) for hosted execution
-
-**Build externals**: All dependencies marked as external to prevent bundling duplication.
-
-### Testing Strategy
-
-**Unit tests** via Vitest (`bun run test`):
-- `/tests/tools/` - Tool unit and integration tests
-- `/tests/utils/` - Utility function tests
-
-**Examples** as integration tests:
-- `/examples/test-tools.ts` - Direct tool API testing (no AI model needed)
-- `/examples/basic.ts` - Full agentic loop with Claude
-- `/examples/test-web-tools.ts` - Web tools demonstration
-
-**Before releases**:
-1. Run `bun run test` to verify all unit tests pass
-2. Run all examples to verify functionality
-3. Test each sandbox implementation
-4. Verify type generation (`bun run build`)
-
-### Breaking Changes to Avoid
-
-**Public APIs** (require major version bump):
-
-1. **Sandbox interface** (`/src/sandbox/interface.ts`)
-   - Adding methods breaks implementers
-   - Changing method signatures breaks all sandboxes
-
-2. **Tool input schemas**
-   - AI models rely on these
-   - Removing fields breaks existing prompts
-
-3. **Tool output types**
-   - Consumers depend on these shapes
-   - Removing fields breaks user code
-
-4. **Tool names**
-   - Used in AI prompts (e.g., "use the Bash tool")
-   - Renaming breaks prompt compatibility
-
-**Safe changes** (minor/patch versions):
-- Adding new optional config fields
-- Adding new tools
-- Adding new sandbox implementations
-- Internal refactoring
-- Bug fixes
-
-### Performance Considerations
-
-**Tool Result Caching**:
-```typescript
-// Enable caching for read-only tools
-const { tools } = await createAgentTools(sandbox, { cache: true });
-
-// Custom TTL and per-tool control
-const { tools } = await createAgentTools(sandbox, {
-  cache: {
-    ttl: 10 * 60 * 1000,  // 10 minutes
-    debug: true,          // Log cache hits/misses
-    Read: true,
-    Glob: true,
-    WebFetch: false,      // Disable for this tool
-  }
-});
-
-// Check cache stats
-const readTool = tools.Read as CachedTool;
-console.log(readTool.getStats());
-// { hits: 5, misses: 2, hitRate: 0.71, size: 2 }
-```
-Returns cached results for identical tool calls. Default TTL: 5 minutes.
-
-**Budget Tracking**:
-```typescript
-// Track cumulative cost and stop when budget exceeded
-const { tools, budget } = await createAgentTools(sandbox, {
-  modelRegistry: { provider: "openRouter" },
-  budget: { maxUsd: 5.00 },
-});
-
-const result = await generateText({
-  model,
-  tools,
-  stopWhen: [stepCountIs(50), budget.stopWhen],
-  onStepFinish: (step) => {
-    budget.onStepFinish(step);
-    console.log(budget.getStatus());
-    // { totalCostUsd: 0.12, maxUsd: 5, remainingUsd: 4.88, ... }
-  },
-});
-```
-Pricing auto-fetched from OpenRouter via `modelRegistry` (free API, cached 24h). Supports manual `modelPricing` overrides. Budget auto-wires into Task tool sub-agents.
-
-**Prompt Caching**:
-```typescript
-import { anthropicPromptCacheMiddleware } from 'bashkit';
-
-const model = wrapLanguageModel({
-  model: anthropic('claude-sonnet-4-5'),
-  middleware: anthropicPromptCacheMiddleware
-});
-```
-Reduces cost/latency for repeated prompts (3+ messages).
-
-**Message Pruning**:
-```typescript
-import { pruneMessages } from 'bashkit';
-
-const pruned = pruneMessages(messages, {
-  maxTokens: 100000,
-  protectRecentUserMessages: 3
-});
-```
-Keeps conversations within token limits.
-
-**Sandbox Choice**:
-- **LocalSandbox**: Fastest (Bun APIs), use for development
-- **VercelSandbox**: Production-ready, Firecracker isolation
-- **E2BSandbox**: Hosted, good for serverless environments
-
-**Timeout Configuration**:
-```typescript
-const { tools } = await createAgentTools(sandbox, {
-  defaultTimeout: 30000, // 30 seconds instead of 120s
-  tools: {
-    Bash: { timeout: 10000 } // Override per-tool
-  }
-});
-```
-
-### Security Notes
-
-**Bash Tool Risks**:
-- Executes arbitrary commands
-- Can access filesystem, network, system
-- Use `blockedCommands` to restrict dangerous operations
-
-**Configuration-Based Security**:
-
-```typescript
-const { tools } = await createAgentTools(sandbox, {
-  tools: {
-    Bash: {
-      blockedCommands: ['rm -rf', 'dd if=', 'curl'],
-      timeout: 10000
-    },
-    Read: {
-      allowedPaths: ['/workspace/**'] // Restrict file access
-    },
-    Write: {
-      maxFileSize: 1_000_000, // 1MB limit
-      allowedPaths: ['/workspace/**']
-    }
-  }
-});
-```
-
-**Best Practices**:
-- Always set timeouts to prevent hanging
-- Use allowedPaths for file operations
-- Block dangerous bash commands
-- Set file size limits
-- Run in sandboxed environments (Vercel/E2B) for production
-- Don't expose directly to untrusted users without additional controls
-
----
-
-## Additional Resources
-
-- **GitHub**: https://github.com/jbreite/bashkit
-- **npm**: `bashkit` (v0.1.0)
-- **Examples**: See `/examples/` directory
-- **Issues**: Report bugs on GitHub Issues
-
----
-
-*Last Updated*: 2026-01-22
-*For*: Claude Code and AI coding assistants
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 120000
index 0000000..47dc3e3
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/README.md b/README.md
index 190af88..e5a8853 100644
--- a/README.md
+++ b/README.md
@@ -549,6 +549,96 @@ console.log({
 });
 ```
 
+## Context Layer
+
+bashkit ships a context layer that handles two concerns most agent loops end up reinventing:
+
+1. **Static system prompt assembly** — discover project docs (`AGENTS.md` / `CLAUDE.md`), collect environment info (cwd, shell, platform, git branch), and build tool guidance. Runs once at init so the system prompt stays stable for Anthropic prompt caching.
+2. **Dynamic per-step layers** — intercept every tool call with `beforeExecute` gates (plan mode, custom allow/deny) and `afterExecute` transforms (output truncation, redirection hints, optional disk stash). Compose into an AI SDK `prepareStep` with auto-compaction and context-status monitoring.
+
+### Building a System Prompt
+
+```typescript
+import { buildSystemContext, createLocalSandbox } from 'bashkit';
+
+const sandbox = createLocalSandbox({ cwd: process.cwd() });
+
+const context = await buildSystemContext(sandbox, {
+  instructions: true,       // walk up from cwd, load AGENTS.md / CLAUDE.md
+  environment: true,        // inject <environment_context> XML
+  toolGuidance: {
+    tools: ['Bash', 'Read', 'Write', 'Edit', 'Grep', 'Glob'],
+  },
+});
+
+// context.combined -> ready to drop into streamText({ system })
+// context.instructions / context.environment / context.toolGuidance -> individual sections
+// context.meta.instructionSources -> which files were discovered
+```
+
+Call this **once at init**. The output must stay stable across turns for prompt caching to work — never regenerate it mid-conversation.
+
+### Tool Execution Layers
+
+```typescript
+import {
+  applyContextLayers,
+  createExecutionPolicy,
+  createOutputPolicy,
+  createAgentTools,
+  createLocalSandbox,
+} from 'bashkit';
+
+const sandbox = createLocalSandbox({ cwd: '/tmp/workspace' });
+const { tools, planModeState } = createAgentTools(sandbox, { planMode: true });
+
+const wrappedTools = applyContextLayers(tools, [
+  // Gate: block Bash/Write/Edit while plan mode is active
+  createExecutionPolicy(planModeState),
+
+  // Transform: truncate oversized results, inject redirection hints,
+  // optionally stash full output to disk
+  createOutputPolicy({
+    maxOutputLength: 30_000,
+    redirectionThreshold: 20_000,
+    stashOutput: {
+      sandbox,
+      tools: ['Bash', 'Grep'],  // only these get full output stashed
+    },
+  }),
+]);
+```
+
+Layers compose: `beforeExecute` runs in order (first rejection wins), `afterExecute` transforms pipe. Custom layers just implement the `ContextLayer` interface — see `src/context/AGENTS.md` for the full contract.
+
+### prepareStep Composition
+
+```typescript
+import { createPrepareStep, MODEL_CONTEXT_LIMITS } from 'bashkit';
+
+const prepareStep = createPrepareStep({
+  compaction: {
+    maxTokens: MODEL_CONTEXT_LIMITS['claude-sonnet-4-5'],
+    summarizerModel: anthropic('claude-haiku-4'),
+    compactionThreshold: 0.85,
+  },
+  contextStatus: {
+    maxTokens: MODEL_CONTEXT_LIMITS['claude-sonnet-4-5'],
+  },
+  planModeState,   // injects a plan-mode hint as a user message
+});
+
+await streamText({
+  model,
+  tools: wrappedTools,
+  system: context.combined,  // from buildSystemContext
+  messages,
+  prepareStep,
+});
+```
+
+**Important**: `createPrepareStep` never touches `system` — it only modifies `messages`. That's load-bearing for Anthropic prompt caching. If you extend it via the `extend` callback, do not set `system` either.
+
 ## Agent Skills
 
 bashkit supports the [Agent Skills](https://agentskills.io) standard - an open format for giving agents new capabilities and expertise. Skills are folders containing a `SKILL.md` file with instructions that agents can load on-demand.
@@ -939,6 +1029,17 @@ Creates a set of agent tools bound to a sandbox instance.
 - `anthropicPromptCacheMiddleware` - Enable prompt caching for Anthropic models (AI SDK v6+)
 - `anthropicPromptCacheMiddlewareV2` - Enable prompt caching for Anthropic models (AI SDK v5)
 
+### Context Layer
+
+- `buildSystemContext(sandbox, config?)` - Assemble instructions + environment + tool guidance into a system prompt
+- `discoverInstructions(sandbox, config?)` - Walk up from cwd loading AGENTS.md / CLAUDE.md files
+- `collectEnvironment(sandbox, config?)` / `formatEnvironment(env)` - Capture and format cwd/shell/platform/git state
+- `buildToolGuidance(config)` - Generate one-line hints for registered tools
+- `withContext(tool, name, layers)` / `applyContextLayers(tools, layers)` - Wrap tools with gate + transform layers
+- `createExecutionPolicy(planModeState, config?)` - Plan-mode + custom gate `ContextLayer`
+- `createOutputPolicy(config?)` - Truncation + redirection hints + optional disk stash `ContextLayer`
+- `createPrepareStep(config)` - Compose compaction + context-status + plan-mode hints into an AI SDK `PrepareStepFunction`
+
 ## Future Roadmap
 
 The following features are planned for future releases:
diff --git a/src/context/AGENTS.md b/src/context/AGENTS.md
new file mode 100644
index 0000000..25135e5
--- /dev/null
+++ b/src/context/AGENTS.md
@@ -0,0 +1,137 @@
+# Context Module
+
+Builds the agent's runtime context: static system prompt assembly (instructions, environment, tool guidance) and dynamic per-step behavior (tool execution gating, output truncation, message-level hints). Bridges discovered project docs and sandbox state into prompt material while layering cross-cutting policies on top of any `ToolSet` without mutating individual tool definitions.
+
+## Files
+
+| File | Purpose |
+|------|---------|
+| `index.ts` | Barrel exports + `ContextLayer` interface, `withContext()`, `applyContextLayers()` |
+| `build-context.ts` | `buildSystemContext()` — assembles instructions + environment + tool guidance into one string |
+| `instructions.ts` | `discoverInstructions()` — walks CWD→root finding AGENTS.md/CLAUDE.md files |
+| `environment.ts` | `collectEnvironment()` + `formatEnvironment()` — cwd/shell/platform/date/git snapshot as XML |
+| `tool-guidance.ts` | `buildToolGuidance()` — one-line hint list keyed by registered tool names |
+| `execution-policy.ts` | `createExecutionPolicy()` — plan-mode + custom gate layer (`beforeExecute`) |
+| `output-policy.ts` | `createOutputPolicy()` — truncation + redirection hints + optional disk stash (`afterExecute`) |
+| `prepare-step.ts` | `createPrepareStep()` — composes compaction + context-status + plan-mode hints for AI SDK `prepareStep` |
+
+## Key Exports
+
+- `withContext(tool, toolName, layers)` -- Wraps a single `Tool` preserving generics; first `beforeExecute` rejection wins, `afterExecute` transforms pipe
+- `applyContextLayers(tools, layers)` -- Wraps an entire `ToolSet` (no-op if `layers.length === 0`)
+- `ContextLayer` -- `{ beforeExecute?, afterExecute? }` with `Record<string, unknown>` params (tool-agnostic)
+- `buildSystemContext(sandbox, config?)` -- Returns `{ instructions, environment, toolGuidance, combined, meta }`; designed to be called **once at init** for prompt-cache stability
+- `createExecutionPolicy(planModeState, config?)` -- Blocks `["Bash", "Write", "Edit"]` by default when plan mode is active
+- `createOutputPolicy(config?)` -- Defaults: `maxOutputLength: 30000`, `redirectionThreshold: 20000`, uses `middleTruncate`
+- `createPrepareStep(config)` -- Returns a `PrepareStepFunction<ToolSet>`; **never touches `system`** (prompt cache)
+- `discoverInstructions`, `collectEnvironment`, `formatEnvironment`, `buildToolGuidance` -- individual section builders
+
+## Architecture
+
+**Two distinct concerns, one module**:
+
+1. **Static prompt assembly** (`build-context.ts` → `instructions.ts` + `environment.ts` + `tool-guidance.ts`): runs once at agent init. Output goes into `streamText({ system })` and must stay stable across turns for Anthropic prompt caching.
+2. **Dynamic per-step layers** (`index.ts` wrappers → `execution-policy.ts` + `output-policy.ts`, plus `prepare-step.ts`): intercept every tool call or message list. Can be async, can short-circuit, can transform.
+
+**Internal dependency graph**:
+```
+index.ts ──────── re-exports everything + ContextLayer/withContext/applyContextLayers
+  ↑
+build-context.ts ──→ instructions.ts
+                 ──→ environment.ts
+                 ──→ tool-guidance.ts
+execution-policy.ts ──→ index.ts (ContextLayer type only)
+output-policy.ts    ──→ index.ts (ContextLayer type only) + ../utils/helpers (middleTruncate)
+prepare-step.ts     ──→ ../utils/compact-conversation + ../utils/context-status
+```
+
+**Layer composition** (`withContext` in `index.ts:74`): `beforeExecute` hooks run sequentially, first `{ error }` return short-circuits; `afterExecute` hooks pipe the result through in order. Tools without `execute` (client-rendered / deferred) pass through untouched.
+
+**Instruction discovery** (`instructions.ts:52`): walks upward from `sandbox.workingDirectory` collecting dirs until a root marker (`.git` by default) is hit, then reverses so root-level docs come first and local docs win on specificity. Per dir, first matching filename from `filenames` wins. Global instruction file (if configured) is prepended. 32KB cap — only the last source is marked `truncated`.
+
+**Output policy** (`output-policy.ts:189`): extracts text from common result shapes (`stdout`, `content`, or serialized JSON), and if over `redirectionThreshold` optionally stashes full text to disk (`/tmp/.bashkit/tool-output` by default), truncates via `middleTruncate`, then injects `_hint` back into the result. Hint priority: `buildHint` callback → `hints` map → built-in per-tool hints → generic fallback.
+
+**prepareStep pipeline** (`prepare-step.ts:50`): (1) run auto-compaction if configured, (2) check context status → inject `<context_status>` as a user message, (3) inject `<plan_mode>` user message when plan mode active, (4) let consumer `extend` callback augment. **Never sets `system`** — comment at `prepare-step.ts:39` is load-bearing.
+
+## Design Patterns
+
+- **Chain of Responsibility** — `withContext` runs layers in order until one rejects (before) / all transform (after)
+- **Strategy** — `OutputPolicyConfig.truncate` and `buildHint` are pluggable
+- **Decorator** — `withContext` wraps tools without changing their `Tool<PARAMETERS, RESULT>` generic (type-preserving)
+- **Gate + Transform split** — `beforeExecute` returns `{ error }` to reject, `afterExecute` returns a new result to transform. Keeps gates pure and transforms side-effect-aware
+- **Stable-system / dynamic-messages** — system prompt is frozen at init (cache friendly), per-step dynamism lives in `messages` via `prepareStep`
+
+## Integration Points
+
+**Depends on**:
+- `../sandbox/interface` — `Sandbox` type for `discoverInstructions`/`collectEnvironment`/`stashOutput`
+- `../tools/enter-plan-mode` — `PlanModeState` type (execution-policy + prepare-step)
+- `../utils/helpers` — `middleTruncate` (output-policy)
+- `../utils/compact-conversation`, `../utils/context-status` — prepare-step pipeline
+- `ai` — `Tool`, `ToolSet`, `ModelMessage`, `PrepareStepFunction`, `PrepareStepResult`
+
+**Used by**:
+- `../tools/index.ts` — `createAgentTools()` imports `withContext`, `applyContextLayers`, `createExecutionPolicy`, `createOutputPolicy` to auto-wire layers from `AgentConfig`
+- `../types.ts` — imports `ContextLayer`, `ExecutionPolicyConfig`, `OutputPolicyConfig` to type `AgentConfig.context`
+
+**Exported from** `src/index.ts:130`: all types (`ContextLayer`, `ExecutionPolicyConfig`, `OutputPolicyConfig`, `StashOutputConfig`, `InstructionDiscoveryConfig`, `DiscoveredInstructions`, `EnvironmentContext`, `EnvironmentContextConfig`, `ToolGuidanceConfig`, `SystemContextConfig`, `SystemContext`, `PrepareStepConfig`) and all functions (`withContext`, `applyContextLayers`, `createExecutionPolicy`, `createOutputPolicy`, `discoverInstructions`, `collectEnvironment`, `formatEnvironment`, `buildToolGuidance`, `buildSystemContext`, `createPrepareStep`).
+
+## Common Modifications
+
+### Add a new context layer
+1. Create `my-layer.ts` exporting a factory that returns `ContextLayer` (from `./index`)
+2. Implement `beforeExecute` (gate) and/or `afterExecute` (transform); remember `afterExecute` must return the result
+3. Re-export from `index.ts`
+4. If it should auto-wire, add to `AgentConfig` in `../types.ts` and hook into `../tools/index.ts` `createAgentTools`
+
+**Gotchas**: params/result are `Record<string, unknown>` — layers are tool-agnostic. Don't assume tool-specific fields without a `toolName` check. `beforeExecute` order matters (first rejection wins); `afterExecute` order matters (pipe).
+
+### Add a new system prompt section
+1. Create builder in a new file with `XConfig` + `buildX(config)` (sync) or `collectX(sandbox, config)` + `formatX(data)` (async, needs sandbox)
+2. Add to `SystemContextConfig` in `build-context.ts:15`
+3. Wire into `buildSystemContext` parallel `Promise.all` and append to `sections` array
+4. Export from `index.ts`
+
+**Gotchas**: sections must be deterministic across init calls (prompt cache). Never pull dynamic state (git status, message counts) into a system section — put those in `prepare-step.ts` as user messages.
+
+### Change output truncation behavior for a tool
+1. If globally: pass custom `truncate` to `createOutputPolicy`
+2. If per-tool: add entry to `BUILT_IN_HINTS` in `output-policy.ts:58` or pass `hints`/`buildHint` via config
+3. If shape-specific: extend `extractText` in `output-policy.ts:127` to recognize new result shapes
+
+**Gotchas**: `excludeTools` skips truncation entirely. Truncated output is re-injected into the original field (`stdout` or `content`); for JSON-serialized results, it lands in `_truncated` + `_hint`.
+
+### Add a new instruction source
+1. Extend `DiscoveredInstructions["sources"][].scope` union in `instructions.ts:19`
+2. Add collection logic in `discoverInstructions` before or after the upward walk (see `globalPath` at `instructions.ts:102`)
+3. Decide merge order (current convention: most specific last)
+
+**Gotchas**: 32KB cap applies to the concatenated output; new sources compete for the same budget. Only the last source gets `truncated: true` flagged.
+
+### Extend `prepareStep` without breaking built-ins
+Pass a `PrepareStepFunction` as `config.extend` to `createPrepareStep`. It runs last and its return is merged into the final `PrepareStepResult`. Do not set `system` in your extension — it breaks Anthropic prompt caching.
+
+## Testing
+
+**Test files** in `tests/context/`:
+- `build-context.test.ts` (434 lines) — system prompt assembly, section enabling/disabling, combined output
+- `execution-policy.test.ts` (145 lines) — plan-mode blocking, custom predicates
+- `output-policy.test.ts` (520 lines) — truncation, hints, stash-to-disk, custom builders
+- `prepare-step.test.ts` (196 lines) — compaction, context-status injection, plan-mode hint, extend composition
+- `with-context.test.ts` (346 lines) — layer wrapping, gate short-circuit, transform pipe, no-execute passthrough
+- `parallel.test.ts` (175 lines) — layer isolation under concurrent tool calls
+- `integration.test.ts` (273 lines) — end-to-end with real `ToolSet` + sandbox
+
+**Run tests**:
+```bash
+bun run test tests/context/
+```
+
+**Gaps**: no dedicated tests for `instructions.ts` or `environment.ts`/`tool-guidance.ts` as units (coverage is indirect through `build-context.test.ts`). Stash-output disk path collision under extreme parallelism is covered by the `stashCounter` but not explicitly tested beyond `parallel.test.ts`.
+
+## Breaking Change Risks
+
+- `ContextLayer` signature — changing `beforeExecute`/`afterExecute` params or return types breaks every registered layer downstream
+- `SystemContext` shape — consumers read `combined` + individual sections; removing fields breaks prompt builders
+- `buildSystemContext` cache discipline — any change that makes its output non-deterministic across calls silently breaks Anthropic prompt caching for every consumer
+- `prepare-step.ts` touching `system` — guarded by comment, but worth a code review flag on any edit
diff --git a/src/context/CLAUDE.md b/src/context/CLAUDE.md
new file mode 120000
index 0000000..47dc3e3
--- /dev/null
+++ b/src/context/CLAUDE.md
@@ -0,0 +1 @@
+AGENTS.md
\ No newline at end of file
diff --git a/src/context/build-context.ts b/src/context/build-context.ts
new file mode 100644
index 0000000..06c6256
--- /dev/null
+++ b/src/context/build-context.ts
@@ -0,0 +1,94 @@
+import type { Sandbox } from "../sandbox/interface";
+import {
+  discoverInstructions,
+  type InstructionDiscoveryConfig,
+  type DiscoveredInstructions,
+} from "./instructions";
+import {
+  collectEnvironment,
+  formatEnvironment,
+  type EnvironmentContext,
+  type EnvironmentContextConfig,
+} from "./environment";
+import { buildToolGuidance, type ToolGuidanceConfig } from "./tool-guidance";
+
+export interface SystemContextConfig {
+  /** Instruction file discovery. true for defaults, or provide config. */
+  instructions?: boolean | InstructionDiscoveryConfig;
+  /** Environment context. true for defaults, or provide config. */
+  environment?: boolean | EnvironmentContextConfig;
+  /** Tool guidance config */
+  toolGuidance?: ToolGuidanceConfig;
+}
+
+export interface SystemContext {
+  /** Discovered instruction text (null if not found or disabled) */
+  instructions: string | null;
+  /** Formatted environment context (null if disabled) */
+  environment: string | null;
+  /** Tool guidance text (null if disabled) */
+  toolGuidance: string | null;
+  /** All sections joined with double newlines. Ready for prompt injection. */
+  combined: string;
+  /** Metadata about what was discovered */
+  meta: {
+    instructionSources?: DiscoveredInstructions["sources"];
+    environmentContext?: EnvironmentContext;
+  };
+}
+
+/**
+ * Build the full system context for an agent session.
+ * Each section is independently available or can be used via `combined`.
+ *
+ * Designed to be called once at init (system prompt is static for cache stability).
+ */
+export async function buildSystemContext(
+  sandbox: Sandbox,
+  config?: SystemContextConfig,
+): Promise<SystemContext> {
+  // Run discovery and environment collection in parallel
+  const [instructions, env] = await Promise.all([
+    config?.instructions !== false && config?.instructions !== undefined
+      ? discoverInstructions(
+          sandbox,
+          typeof config.instructions === "object"
+            ? config.instructions
+            : undefined,
+        )
+      : null,
+    config?.environment !== false && config?.environment !== undefined
+      ? collectEnvironment(
+          sandbox,
+          typeof config.environment === "object"
+            ? config.environment
+            : undefined,
+        )
+      : null,
+  ]);
+
+  const instructionsText = instructions
+    ? `# Project Instructions\n<INSTRUCTIONS>\n${instructions.text}\n</INSTRUCTIONS>`
+    : null;
+
+  const environmentText = env ? formatEnvironment(env) : null;
+
+  const toolGuidanceText = config?.toolGuidance
+    ? buildToolGuidance(config.toolGuidance)
+    : null;
+
+  const sections = [instructionsText, environmentText, toolGuidanceText]
+    .filter(Boolean)
+    .join("\n\n");
+
+  return {
+    instructions: instructionsText,
+    environment: environmentText,
+    toolGuidance: toolGuidanceText,
+    combined: sections,
+    meta: {
+      instructionSources: instructions?.sources,
+      environmentContext: env ?? undefined,
+    },
+  };
+}
diff --git a/src/context/environment.ts b/src/context/environment.ts
new file mode 100644
index 0000000..7fb5795
--- /dev/null
+++ b/src/context/environment.ts
@@ -0,0 +1,91 @@
+import type { Sandbox } from "../sandbox/interface";
+
+export interface EnvironmentContext {
+  cwd: string;
+  shell: string;
+  platform: string;
+  date: string; // YYYY-MM-DD
+  timezone?: string;
+  gitBranch?: string;
+  gitStatus?: string; // changed file count
+}
+
+export interface EnvironmentContextConfig {
+  /** Include git info. Default: true */
+  git?: boolean;
+  /** Include timezone. Default: true */
+  timezone?: boolean;
+  /** Custom fields to inject */
+  custom?: Record<string, string>;
+}
+
+/**
+ * Collect current environment context from the sandbox.
+ * Designed to be called per-turn so the model always has fresh state.
+ */
+export async function collectEnvironment(
+  sandbox: Sandbox,
+  config?: EnvironmentContextConfig,
+): Promise<EnvironmentContext> {
+  const cwd =
+    (sandbox as { workingDirectory?: string }).workingDirectory ?? "/tmp";
+
+  const [shellResult, gitBranch, gitStatus, tz] = await Promise.all([
+    sandbox.exec("echo $SHELL", { timeout: 5000 }).catch(() => null),
+    config?.git !== false
+      ? sandbox
+          .exec("git branch --show-current 2>/dev/null", { timeout: 5000 })
+          .catch(() => null)
+      : null,
+    config?.git !== false
+      ? sandbox
+          .exec("git status --porcelain 2>/dev/null | wc -l", {
+            timeout: 5000,
+          })
+          .catch(() => null)
+      : null,
+    config?.timezone !== false
+      ? sandbox
+          .exec("date +%Z 2>/dev/null", { timeout: 5000 })
+          .catch(() => null)
+      : null,
+  ]);
+
+  return {
+    cwd,
+    shell: shellResult?.stdout?.trim() ?? "unknown",
+    platform: typeof process !== "undefined" ? process.platform : "unknown",
+    date: new Date().toISOString().split("T")[0],
+    timezone: tz?.stdout?.trim() || undefined,
+    gitBranch: gitBranch?.stdout?.trim() || undefined,
+    gitStatus: gitStatus?.stdout?.trim() || undefined,
+  };
+}
+
+/**
+ * Format environment context as XML for prompt injection.
+ * Matches Codex's <environment_context> format.
+ */
+export function formatEnvironment(
+  env: EnvironmentContext,
+  custom?: Record<string, string>,
+): string {
+  const lines = [
+    "<environment_context>",
+    `  <cwd>${env.cwd}</cwd>`,
+    `  <shell>${env.shell}</shell>`,
+    `  <platform>${env.platform}</platform>`,
+    `  <date>${env.date}</date>`,
+  ];
+  if (env.timezone) lines.push(`  <timezone>${env.timezone}</timezone>`);
+  if (env.gitBranch) lines.push(`  <git_branch>${env.gitBranch}</git_branch>`);
+  if (env.gitStatus)
+    lines.push(`  <git_changed_files>${env.gitStatus}</git_changed_files>`);
+  if (custom) {
+    for (const [key, value] of Object.entries(custom)) {
+      lines.push(`  <${key}>${value}</${key}>`);
+    }
+  }
+  lines.push("</environment_context>");
+  return lines.join("\n");
+}
diff --git a/src/context/execution-policy.ts b/src/context/execution-policy.ts
new file mode 100644
index 0000000..8a40b95
--- /dev/null
+++ b/src/context/execution-policy.ts
@@ -0,0 +1,49 @@
+import type { PlanModeState } from "../tools/enter-plan-mode";
+import type { ContextLayer } from "./index";
+
+export interface ExecutionPolicyConfig {
+  /** Tools that are blocked when plan mode is active. Default: ["Bash", "Write", "Edit"] */
+  planModeBlockedTools?: string[];
+  /** Custom predicate for blocking tools. Return error string to reject. */
+  shouldBlock?: (
+    toolName: string,
+    params: Record<string, unknown>,
+  ) => string | undefined;
+}
+
+const DEFAULT_PLAN_MODE_BLOCKED = ["Bash", "Write", "Edit"];
+
+/**
+ * Create an execution policy context layer that gates tool execution
+ * based on plan mode state and optional custom predicates.
+ *
+ * When plan mode is active, blocked tools return an error instead of executing.
+ * All tools remain registered (prompt cache stable) — only execution is gated.
+ */
+export function createExecutionPolicy(
+  planModeState: PlanModeState,
+  config?: ExecutionPolicyConfig,
+): ContextLayer {
+  const blocked = new Set(
+    config?.planModeBlockedTools ?? DEFAULT_PLAN_MODE_BLOCKED,
+  );
+
+  return {
+    beforeExecute: (toolName, params) => {
+      // Plan mode gate
+      if (planModeState.isActive && blocked.has(toolName)) {
+        return {
+          error: `${toolName} is not available in plan mode. Use read-only tools (Read, Grep, Glob) to gather information, then call ExitPlanMode when your plan is ready.`,
+        };
+      }
+
+      // Custom gate
+      if (config?.shouldBlock) {
+        const reason = config.shouldBlock(toolName, params);
+        if (reason) return { error: reason };
+      }
+
+      return undefined;
+    },
+  };
+}
diff --git a/src/context/index.ts b/src/context/index.ts
new file mode 100644
index 0000000..c8c954c
--- /dev/null
+++ b/src/context/index.ts
@@ -0,0 +1,128 @@
+import type { Tool, ToolSet } from "ai";
+
+// Re-export all context module types and functions
+export type { ExecutionPolicyConfig } from "./execution-policy";
+export { createExecutionPolicy } from "./execution-policy";
+
+export type {
+  OutputPolicyConfig,
+  StashOutputConfig,
+} from "./output-policy";
+export { createOutputPolicy } from "./output-policy";
+
+export type {
+  InstructionDiscoveryConfig,
+  DiscoveredInstructions,
+} from "./instructions";
+export { discoverInstructions } from "./instructions";
+
+export type {
+  EnvironmentContext,
+  EnvironmentContextConfig,
+} from "./environment";
+export { collectEnvironment, formatEnvironment } from "./environment";
+
+export type { ToolGuidanceConfig } from "./tool-guidance";
+export { buildToolGuidance } from "./tool-guidance";
+
+export type {
+  SystemContextConfig,
+  SystemContext,
+} from "./build-context";
+export { buildSystemContext } from "./build-context";
+
+export type { PrepareStepConfig } from "./prepare-step";
+export { createPrepareStep } from "./prepare-step";
+
+/**
+ * Context layer that intercepts tool execution.
+ * Params/result typed as Record<string, unknown> since layers
+ * operate across all tools (not tool-specific).
+ */
+export interface ContextLayer {
+  /**
+   * Called before tool.execute(). Return an error object to block execution.
+   * Return undefined to allow execution to proceed.
+   */
+  beforeExecute?: (
+    toolName: string,
+    params: Record<string, unknown>,
+  ) => Promise<{ error: string } | undefined> | { error: string } | undefined;
+
+  /**
+   * Called after tool.execute() with the raw result. Return the result
+   * as-is or return a transformed version (e.g., truncated with hints).
+   */
+  afterExecute?: (
+    toolName: string,
+    params: Record<string, unknown>,
+    result: Record<string, unknown>,
+  ) => Promise<Record<string, unknown>> | Record<string, unknown>;
+}
+
+/**
+ * Wrap a single tool with context layers.
+ * Preserves the Tool<PARAMETERS, RESULT> generic for type inference.
+ *
+ * Layers compose: first rejection wins for beforeExecute,
+ * transforms chain (pipe) for afterExecute.
+ */
+export function withContext<T extends Tool>(
+  tool: T,
+  toolName: string,
+  layers: ContextLayer[],
+): T {
+  const originalExecute = tool.execute;
+  if (!originalExecute) return tool; // no-execute tools pass through
+
+  return {
+    ...tool,
+    execute: async (
+      params: Parameters<NonNullable<T["execute"]>>[0],
+      execOptions: Parameters<NonNullable<T["execute"]>>[1],
+    ) => {
+      // Run all beforeExecute gates (first rejection wins)
+      const paramsRecord = params as Record<string, unknown>;
+      for (const layer of layers) {
+        if (layer.beforeExecute) {
+          const rejection = await layer.beforeExecute(toolName, paramsRecord);
+          if (rejection) return rejection;
+        }
+      }
+
+      // Execute the tool
+      const result = await originalExecute(params, execOptions);
+
+      // Run all afterExecute transforms (piped)
+      let transformed = result as Record<string, unknown>;
+      for (const layer of layers) {
+        if (layer.afterExecute) {
+          transformed = await layer.afterExecute(
+            toolName,
+            paramsRecord,
+            transformed,
+          );
+        }
+      }
+
+      return transformed;
+    },
+  } as T;
+}
+
+/**
+ * Apply context layers to an entire ToolSet.
+ * Returns a new ToolSet with the same keys and types.
+ */
+export function applyContextLayers<T extends ToolSet>(
+  tools: T,
+  layers: ContextLayer[],
+): T {
+  if (layers.length === 0) return tools;
+
+  const wrapped = { ...tools };
+  for (const [name, tool] of Object.entries(wrapped)) {
+    (wrapped as Record<string, Tool>)[name] = withContext(tool, name, layers);
+  }
+  return wrapped;
+}
diff --git a/src/context/instructions.ts b/src/context/instructions.ts
new file mode 100644
index 0000000..d9b057e
--- /dev/null
+++ b/src/context/instructions.ts
@@ -0,0 +1,137 @@
+import type { Sandbox } from "../sandbox/interface";
+
+export interface InstructionDiscoveryConfig {
+  /** Starting directory for upward search. Defaults to sandbox CWD. */
+  cwd?: string;
+  /** Filenames to search for, in priority order. Default: ["AGENTS.md", "CLAUDE.md"] */
+  filenames?: string[];
+  /** Markers that indicate project root. Default: [".git"] */
+  rootMarkers?: string[];
+  /** Max bytes before truncation. Default: 32768 (32KB, matches Codex) */
+  maxBytes?: number;
+  /** Global instruction file path. Default: none */
+  globalPath?: string;
+}
+
+export interface DiscoveredInstructions {
+  /** Combined instruction text, ready for prompt injection */
+  text: string;
+  /** Individual files that were found and loaded */
+  sources: Array<{
+    path: string;
+    scope: "global" | "project" | "local";
+    bytes: number;
+    truncated: boolean;
+  }>;
+}
+
+/**
+ * Discover and merge instruction files (AGENTS.md / CLAUDE.md) by walking
+ * from CWD up to project root. Global instructions prepended if provided.
+ *
+ * Search order per directory: first matching filename wins.
+ * Merge order: global → project root → ... → CWD (most specific last).
+ */
+export async function discoverInstructions(
+  sandbox: Sandbox,
+  config?: InstructionDiscoveryConfig,
+): Promise<DiscoveredInstructions | null> {
+  const filenames = config?.filenames ?? ["AGENTS.md", "CLAUDE.md"];
+  const rootMarkers = config?.rootMarkers ?? [".git"];
+  const maxBytes = config?.maxBytes ?? 32768;
+  const cwd =
+    config?.cwd ??
+    (sandbox as { workingDirectory?: string }).workingDirectory ??
+    "/tmp";
+
+  const sources: DiscoveredInstructions["sources"] = [];
+  const sections: string[] = [];
+
+  // 1. Walk upward from CWD to find project root and collect instruction files
+  const dirsToSearch: string[] = [];
+  let dir = cwd;
+  let projectRoot: string | null = null;
+
+  while (true) {
+    dirsToSearch.push(dir);
+
+    // Check for root markers
+    for (const marker of rootMarkers) {
+      const markerPath = `${dir}/${marker}`;
+      if (await sandbox.fileExists(markerPath)) {
+        projectRoot = dir;
+      }
+    }
+
+    if (projectRoot) break;
+
+    // Move up
+    const parent = dir.replace(/\/[^/]+$/, "");
+    if (parent === dir) break; // at filesystem root
+    dir = parent;
+  }
+
+  // 2. Collect instruction files from root down to CWD (most specific last)
+  const orderedDirs = dirsToSearch.reverse(); // root first, CWD last
+
+  for (const searchDir of orderedDirs) {
+    for (const filename of filenames) {
+      const filePath = `${searchDir}/${filename}`;
+      try {
+        if (await sandbox.fileExists(filePath)) {
+          const content = await sandbox.readFile(filePath);
+          const scope =
+            searchDir === cwd
+              ? "local"
+              : searchDir === projectRoot
+                ? "project"
+                : "local";
+          sections.push(content);
+          sources.push({
+            path: filePath,
+            scope,
+            bytes: content.length,
+            truncated: false,
+          });
+          break; // first matching filename wins per directory
+        }
+      } catch {
+        // Skip files we can't read
+      }
+    }
+  }
+
+  // 3. Prepend global instructions if configured
+  if (config?.globalPath) {
+    try {
+      if (await sandbox.fileExists(config.globalPath)) {
+        const content = await sandbox.readFile(config.globalPath);
+        sections.unshift(content);
+        sources.unshift({
+          path: config.globalPath,
+          scope: "global",
+          bytes: content.length,
+          truncated: false,
+        });
+      }
+    } catch {
+      // Skip if can't read
+    }
+  }
+
+  if (sections.length === 0) return null;
+
+  // 4. Concatenate with separator
+  let combined = sections.join("\n\n--- project-doc ---\n\n");
+
+  // 5. Truncate if needed
+  if (combined.length > maxBytes) {
+    combined = combined.slice(0, maxBytes);
+    // Mark last source as truncated
+    if (sources.length > 0) {
+      sources[sources.length - 1].truncated = true;
+    }
+  }
+
+  return { text: combined, sources };
+}
diff --git a/src/context/output-policy.ts b/src/context/output-policy.ts
new file mode 100644
index 0000000..747f7cc
--- /dev/null
+++ b/src/context/output-policy.ts
@@ -0,0 +1,227 @@
+import type { Sandbox } from "../sandbox/interface";
+import { middleTruncate } from "../utils/helpers";
+import type { ContextLayer } from "./index";
+
+/**
+ * Configuration for stashing full output to disk before truncating.
+ */
+export interface StashOutputConfig {
+  /** Sandbox for writing files */
+  sandbox: Sandbox;
+  /** Base directory for stashed files. Default: '/tmp/.bashkit/tool-output' */
+  dir?: string;
+  /** Tools that get disk stash. Others just truncate without saving. */
+  tools: string[];
+  /**
+   * Custom file path per tool. Return undefined for default path.
+   * Receives the result object so it can extract tool-specific fields.
+   */
+  pathFor?: (
+    toolName: string,
+    params: Record<string, unknown>,
+    result: Record<string, unknown>,
+  ) => string | undefined;
+}
+
+export interface OutputPolicyConfig {
+  /** Max characters before truncation. Default: 30000 */
+  maxOutputLength?: number;
+  /** Max characters before suggesting tool redirection. Default: 20000 */
+  redirectionThreshold?: number;
+  /** Tools whose output should never be truncated */
+  excludeTools?: string[];
+  /** Custom truncation function. Defaults to middleTruncate. */
+  truncate?: (text: string, maxLength: number) => string;
+  /** Simple per-tool hint overrides. Merged with built-in defaults. */
+  hints?: Record<string, string>;
+  /**
+   * Full control callback for building redirection hints.
+   * Receives the raw result object for extracting tool-specific fields.
+   * Return string to override, undefined to fall through to hints map / defaults.
+   */
+  buildHint?: (
+    toolName: string,
+    params: Record<string, unknown>,
+    originalLength: number,
+    result: Record<string, unknown>,
+  ) => string | undefined;
+  /** Opt-in disk stash for full output before truncating. */
+  stashOutput?: StashOutputConfig;
+}
+
+/** Rough line estimate from character count */
+function estimateLines(charCount: number): number {
+  return Math.max(1, Math.round(charCount / 80));
+}
+
+/** Built-in per-tool redirection hints */
+const BUILT_IN_HINTS: Record<string, (originalLength: number) => string> = {
+  Bash: (len) =>
+    `Output truncated (${len} chars, ~${estimateLines(len)} lines). ` +
+    `To see specific parts, re-run with | head, | tail, or | grep, ` +
+    `or use Read to examine any output files.`,
+
+  Grep: (len) =>
+    `Results truncated (${len} chars). ` +
+    `Use head_limit and offset parameters to paginate, ` +
+    `or narrow your pattern/glob to reduce results.`,
+
+  Read: (len) =>
+    `File content truncated (${len} chars, ~${estimateLines(len)} lines). ` +
+    `Use offset and limit parameters to read specific sections.`,
+};
+
+function defaultHint(len: number): string {
+  return (
+    `Output truncated (${len} chars). ` +
+    `Use Read, Grep, or Bash with targeted commands to access specific parts.`
+  );
+}
+
+/**
+ * Build a redirection hint for a truncated tool result.
+ *
+ * Priority: buildHint callback > hints map > built-in defaults > generic fallback.
+ * When stashOutput wrote a file, the file path is prepended.
+ */
+function buildRedirectionHint(
+  toolName: string,
+  params: Record<string, unknown>,
+  originalLength: number,
+  result: Record<string, unknown>,
+  config?: OutputPolicyConfig,
+  stashedPath?: string,
+): string {
+  // 1. Custom buildHint callback
+  if (config?.buildHint) {
+    const custom = config.buildHint(toolName, params, originalLength, result);
+    if (custom !== undefined) {
+      return stashedPath
+        ? `Full output saved to ${stashedPath}. ${custom}`
+        : custom;
+    }
+  }
+
+  // 2. Custom hints map
+  if (config?.hints?.[toolName]) {
+    const hint = config.hints[toolName];
+    return stashedPath ? `Full output saved to ${stashedPath}. ${hint}` : hint;
+  }
+
+  // 3. Built-in per-tool hints
+  const builtIn = BUILT_IN_HINTS[toolName];
+  if (builtIn) {
+    const hint = builtIn(originalLength);
+    return stashedPath ? `Full output saved to ${stashedPath}. ${hint}` : hint;
+  }
+
+  // 4. Generic fallback
+  const hint = defaultHint(originalLength);
+  return stashedPath ? `Full output saved to ${stashedPath}. ${hint}` : hint;
+}
+
+/**
+ * Extract text content from a tool result for truncation checking.
+ * Checks common shapes: { stdout }, { content }, or stringified JSON.
+ */
+function extractText(result: Record<string, unknown>): string | null {
+  if (typeof result.stdout === "string") return result.stdout;
+  if (typeof result.content === "string") return result.content;
+
+  // For structured results, serialize and check length
+  const serialized = JSON.stringify(result);
+  return serialized;
+}
+
+/**
+ * Inject truncated text and hint back into the result object.
+ */
+function injectTruncatedOutput(
+  result: Record<string, unknown>,
+  truncated: string,
+  hint: string,
+): Record<string, unknown> {
+  // Replace the field that was extracted
+  if (typeof result.stdout === "string") {
+    return { ...result, stdout: truncated, _hint: hint };
+  }
+  if (typeof result.content === "string") {
+    return { ...result, content: truncated, _hint: hint };
+  }
+
+  // For serialized JSON results, return the truncated string + hint
+  return { _truncated: truncated, _hint: hint };
+}
+
+/**
+ * Counter for unique stash file paths within a single process.
+ * Prevents collisions when parallel tool calls generate paths
+ * within the same millisecond.
+ */
+let stashCounter = 0;
+
+/**
+ * Generate a default stash file path.
+ */
+function defaultStashPath(dir: string, toolName: string): string {
+  return `${dir}/${toolName}-${Date.now()}-${stashCounter++}.txt`;
+}
+
+/**
+ * Create an output policy context layer that handles truncation
+ * and injects redirection hints for tool results.
+ *
+ * Optionally writes full output to disk before truncating (stashOutput).
+ */
+export function createOutputPolicy(config?: OutputPolicyConfig): ContextLayer {
+  const maxLen = config?.maxOutputLength ?? 30000;
+  const redirectAt = config?.redirectionThreshold ?? 20000;
+  const exclude = new Set(config?.excludeTools ?? []);
+  const truncateFn = config?.truncate ?? middleTruncate;
+  const stash = config?.stashOutput;
+  const stashDir = stash?.dir ?? "/tmp/.bashkit/tool-output";
+  const stashTools = stash ? new Set(stash.tools) : new Set<string>();
+  let stashDirCreated = false;
+
+  return {
+    afterExecute: async (toolName, params, result) => {
+      if (exclude.has(toolName)) return result;
+
+      const text = extractText(result);
+      if (!text || text.length <= redirectAt) return result;
+
+      // Stash full output to disk if configured for this tool
+      let stashedPath: string | undefined;
+      if (stash && stashTools.has(toolName)) {
+        // Determine file path
+        stashedPath =
+          stash.pathFor?.(toolName, params, result) ??
+          defaultStashPath(stashDir, toolName);
+
+        // Ensure directory exists (once)
+        const dir = stashedPath.replace(/\/[^/]+$/, "");
+        if (!stashDirCreated || dir !== stashDir) {
+          await stash.sandbox.exec(`mkdir -p ${dir}`);
+          if (dir === stashDir) stashDirCreated = true;
+        }
+
+        await stash.sandbox.writeFile(stashedPath, text);
+      }
+
+      // Truncate
+      const truncated = truncateFn(text, maxLen);
+
+      // Build redirection hint
+      const hint = buildRedirectionHint(
+        toolName,
+        params,
+        text.length,
+        result,
+        config,
+        stashedPath,
+      );
+
+      return injectTruncatedOutput(result, truncated, hint);
+    },
+  };
+}
diff --git a/src/context/prepare-step.ts b/src/context/prepare-step.ts
new file mode 100644
index 0000000..a6c3f65
--- /dev/null
+++ b/src/context/prepare-step.ts
@@ -0,0 +1,110 @@
+import type {
+  PrepareStepFunction,
+  PrepareStepResult,
+  ToolSet,
+  ModelMessage,
+} from "ai";
+import type { PlanModeState } from "../tools/enter-plan-mode";
+import {
+  createAutoCompaction,
+  type CompactConversationConfig,
+} from "../utils/compact-conversation";
+import {
+  getContextStatus,
+  type ContextStatusConfig,
+} from "../utils/context-status";
+
+export interface PrepareStepConfig {
+  /** Auto-compaction config. If provided, messages are compacted when threshold hit. */
+  compaction?: CompactConversationConfig;
+  /** Context status config. If provided, guidance injected at high/critical usage. */
+  contextStatus?: {
+    maxTokens: number;
+    config?: ContextStatusConfig;
+  };
+  /** Plan mode state (for message-level hints — enforcement is via withContext) */
+  planModeState?: PlanModeState;
+  /** Custom prepareStep logic that runs after built-in logic */
+  extend?: PrepareStepFunction<ToolSet>;
+}
+
+/**
+ * Create a prepareStep callback that composes:
+ * - Message compaction (auto-compact when threshold hit)
+ * - Context status monitoring (guidance injection as user message)
+ * - Plan mode hints (as user message, belt for withContext's suspenders)
+ *
+ * CRITICAL: Does NOT touch `system` prompt — system prompt is static
+ * (set once in streamText({ system })) to preserve Anthropic prompt caching.
+ * prepareStep only handles `messages`.
+ *
+ * Returns a PrepareStepFunction compatible with generateText/streamText.
+ */
+export function createPrepareStep(
+  config: PrepareStepConfig,
+): PrepareStepFunction<ToolSet> {
+  const autoCompact = config.compaction
+    ? createAutoCompaction(config.compaction)
+    : null;
+
+  return async (args) => {
+    // 1. Message compaction (if configured)
+    let effectiveMessages = args.messages;
+    if (autoCompact) {
+      const compactResult = await autoCompact.prepareStep(args);
+      if (compactResult?.messages) {
+        effectiveMessages = compactResult.messages as ModelMessage[];
+      }
+    }
+
+    // 2. Context status monitoring — inject guidance as user message
+    const injectedMessages: ModelMessage[] = [];
+    if (config.contextStatus) {
+      const status = getContextStatus(
+        effectiveMessages,
+        config.contextStatus.maxTokens,
+        config.contextStatus.config,
+      );
+      if (status.guidance) {
+        injectedMessages.push({
+          role: "user" as const,
+          content: `<context_status>${status.guidance}</context_status>`,
+        });
+      }
+    }
+
+    // 3. Plan mode hint as user message
+    if (config.planModeState?.isActive) {
+      injectedMessages.push({
+        role: "user" as const,
+        content:
+          "<plan_mode>PLAN MODE ACTIVE — use read-only tools (Read, Grep, Glob) to gather information. Call ExitPlanMode when your plan is ready.</plan_mode>",
+      });
+    }
+
+    // Build final messages
+    const messagesChanged =
+      effectiveMessages !== args.messages || injectedMessages.length > 0;
+    const finalMessages = messagesChanged
+      ? [...effectiveMessages, ...injectedMessages]
+      : undefined;
+
+    // 4. Let consumer extend
+    const extended = config.extend
+      ? await config.extend({
+          ...args,
+          messages: finalMessages ?? args.messages,
+        })
+      : undefined;
+
+    const result: PrepareStepResult<ToolSet> = {
+      ...extended,
+    };
+
+    if (finalMessages && !extended?.messages) {
+      result.messages = finalMessages;
+    }
+
+    return result;
+  };
+}
diff --git a/src/context/tool-guidance.ts b/src/context/tool-guidance.ts
new file mode 100644
index 0000000..1c4756d
--- /dev/null
+++ b/src/context/tool-guidance.ts
@@ -0,0 +1,46 @@
+export interface ToolGuidanceConfig {
+  /** Tool names that are registered */
+  tools: string[];
+  /** Per-tool one-line hints. Merged with defaults. */
+  hints?: Record<string, string>;
+  /** General guidelines to include */
+  guidelines?: string[];
+}
+
+const DEFAULT_HINTS: Record<string, string> = {
+  Bash: "Execute shell commands. Prefer Read/Grep/Glob over bash for file exploration.",
+  Read: "Read files or list directories. Use offset/limit for large files.",
+  Write: "Create or overwrite files. Read first before overwriting.",
+  Edit: "Replace exact strings in files. Prefer over Write for modifications.",
+  Glob: "Find files by pattern. Faster than bash find.",
+  Grep: "Search file contents with regex. Faster than bash grep.",
+  WebSearch: "Search the web. Use for current information.",
+  WebFetch: "Fetch and analyze a URL.",
+  Task: "Spawn sub-agents for complex parallel work.",
+  TodoWrite: "Track multi-step task progress.",
+};
+
+/**
+ * Generate tool guidance text based on which tools are available.
+ * Only includes hints for tools that are actually registered.
+ */
+export function buildToolGuidance(config: ToolGuidanceConfig): string {
+  const hints = { ...DEFAULT_HINTS, ...config.hints };
+  const lines = ["## Available Tools"];
+
+  for (const tool of config.tools) {
+    const hint = hints[tool];
+    if (hint) {
+      lines.push(`- **${tool}**: ${hint}`);
+    }
+  }
+
+  if (config.guidelines?.length) {
+    lines.push("", "## Guidelines");
+    for (const g of config.guidelines) {
+      lines.push(`- ${g}`);
+    }
+  }
+
+  return lines.join("\n");
+}
diff --git a/src/index.ts b/src/index.ts
index 719399f..7fd2ebc 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -116,6 +116,7 @@ export type {
   AskUserConfig,
   BudgetConfig,
   CacheConfig,
+  ContextConfig,
   ModelRegistryConfig,
   ModelRegistryProvider,
   PricingProvider,
@@ -126,6 +127,34 @@ export type {
 } from "./types";
 export { DEFAULT_CONFIG } from "./types";
 
+// Context layer (tool execution gating + output policy + prompt assembly)
+export type {
+  ContextLayer,
+  ExecutionPolicyConfig,
+  OutputPolicyConfig,
+  StashOutputConfig,
+  InstructionDiscoveryConfig,
+  DiscoveredInstructions,
+  EnvironmentContext,
+  EnvironmentContextConfig,
+  ToolGuidanceConfig,
+  SystemContextConfig,
+  SystemContext,
+  PrepareStepConfig,
+} from "./context";
+export {
+  withContext,
+  applyContextLayers,
+  createExecutionPolicy,
+  createOutputPolicy,
+  discoverInstructions,
+  collectEnvironment,
+  formatEnvironment,
+  buildToolGuidance,
+  buildSystemContext,
+  createPrepareStep,
+} from "./context";
+
 // Cache utilities
 export type {
   CachedTool,
diff --git a/src/tools/ask-user.ts b/src/tools/ask-user.ts
index a6c2760..3bc6bfe 100644
--- a/src/tools/ask-user.ts
+++ b/src/tools/ask-user.ts
@@ -28,9 +28,7 @@ const questionSchema = z.object({
     .describe(
       "Short header label shown in the UI (12 or fewer chars). Examples: 'Auth method', 'Library', 'Approach'.",
     ),
-  question: z
-    .string()
-    .describe("Single-sentence prompt shown to the user."),
+  question: z.string().describe("Single-sentence prompt shown to the user."),
   options: z
     .array(questionOptionSchema)
     .min(2)
diff --git a/src/tools/index.ts b/src/tools/index.ts
index 73f3b28..5012533 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -1,6 +1,9 @@
-import type { ToolSet } from "ai";
+import type { ToolSet, Tool } from "ai";
 import type { CacheStore } from "../cache/types";
 import { cached, LRUCacheStore } from "../cache";
+import { applyContextLayers, type ContextLayer } from "../context/index";
+import { createExecutionPolicy } from "../context/execution-policy";
+import { createOutputPolicy } from "../context/output-policy";
 import type { Sandbox } from "../sandbox/interface";
 import type { AgentConfig, CacheConfig } from "../types";
 import { DEFAULT_CONFIG } from "../types";
@@ -128,6 +131,8 @@ export interface AgentToolsResult {
   budget?: BudgetTracker;
   /** Model info from OpenRouter (only present when modelRegistry or budget pricingProvider is configured) */
   openRouterModels?: Map<string, ModelInfo>;
+  /** Context layers applied to tools. Use with applyContextLayers() for late-added tools. */
+  contextLayers: ContextLayer[];
 }
 
 /**
@@ -216,7 +221,14 @@ export async function createAgentTools(
     tools.WebFetch = createWebFetchTool(config.webFetch);
   }
 
-  // Apply caching if configured
+  // Merge extra tools from context config
+  if (config?.context?.extraTools) {
+    for (const [name, extraTool] of Object.entries(config.context.extraTools)) {
+      (tools as Record<string, Tool>)[name] = extraTool;
+    }
+  }
+
+  // Apply caching if configured (inner wrapper — cache sits inside context)
   const cacheConfig = resolveCache(config?.cache);
   if (cacheConfig.store) {
     for (const [name, tool] of Object.entries(tools)) {
@@ -234,6 +246,42 @@ export async function createAgentTools(
     }
   }
 
+  // Build and apply context layers (outer wrapper — wraps outside cache)
+  const contextLayers: ContextLayer[] = [];
+
+  if (config?.context) {
+    // Execution policy (only if planMode is enabled)
+    if (planModeState && config.context.executionPolicy) {
+      contextLayers.push(
+        createExecutionPolicy(planModeState, config.context.executionPolicy),
+      );
+    }
+
+    // Output policy (enabled by default, unless explicitly false)
+    if (config.context.outputPolicy !== false) {
+      contextLayers.push(
+        createOutputPolicy(
+          config.context.outputPolicy === undefined
+            ? undefined
+            : config.context.outputPolicy,
+        ),
+      );
+    }
+
+    // Custom layers (run after built-in layers)
+    if (config.context.layers) {
+      contextLayers.push(...config.context.layers);
+    }
+
+    // Apply all layers to all tools
+    if (contextLayers.length > 0) {
+      const wrapped = applyContextLayers(tools, contextLayers);
+      for (const [name, wrappedTool] of Object.entries(wrapped)) {
+        (tools as Record<string, Tool>)[name] = wrappedTool;
+      }
+    }
+  }
+
   // Fetch model info from provider (hoisted before budget so both can share the data)
   let openRouterModels: Map<string, ModelInfo> | undefined;
 
@@ -285,7 +333,7 @@ export async function createAgentTools(
     });
   }
 
-  return { tools, planModeState, budget, openRouterModels };
+  return { tools, planModeState, budget, openRouterModels, contextLayers };
 }
 
 // --- Ask User Tool ---
diff --git a/src/tools/read.ts b/src/tools/read.ts
index 9033550..e511f43 100644
--- a/src/tools/read.ts
+++ b/src/tools/read.ts
@@ -163,8 +163,7 @@ export function createReadTool(sandbox: Sandbox, config?: ToolConfig) {
         const selectedLines = allLines.slice(startLine, endLine);
 
         // Per-line truncation (silent, like Claude Code's MAX_LINE_LENGTH)
-        const maxLineLength =
-          config?.maxLineLength ?? DEFAULT_MAX_LINE_LENGTH;
+        const maxLineLength = config?.maxLineLength ?? DEFAULT_MAX_LINE_LENGTH;
         const truncatedLines = selectedLines.map((line) =>
           line.length > maxLineLength
             ? line.slice(0, maxLineLength) + "…"
diff --git a/src/types.ts b/src/types.ts
index 042fd69..e6b8282 100644
--- a/src/types.ts
+++ b/src/types.ts
@@ -1,5 +1,8 @@
-import type { LanguageModel, Tool } from "ai";
+import type { LanguageModel, Tool, ToolSet } from "ai";
 import type { CacheStore } from "./cache/types";
+import type { ContextLayer } from "./context/index";
+import type { ExecutionPolicyConfig } from "./context/execution-policy";
+import type { OutputPolicyConfig } from "./context/output-policy";
 import type { SkillMetadata } from "./skills/types";
 import type { ModelPricing } from "./utils/budget-tracking";
 
@@ -157,6 +160,30 @@ export type BudgetConfig = {
   modelPricing?: Record<string, ModelPricing>;
 };
 
+/**
+ * Context layer configuration. Opt-in — if not provided, tools work as they do today.
+ * When provided, wraps all tools (bashkit + extraTools) with execution and output policies.
+ */
+export interface ContextConfig {
+  /** Execution policy. Controls which tools are blocked based on state (e.g., plan mode). */
+  executionPolicy?: ExecutionPolicyConfig;
+  /**
+   * Output policy. Controls truncation behavior and redirection hints.
+   * Set to false to disable. Enabled by default when context is provided.
+   */
+  outputPolicy?: OutputPolicyConfig | false;
+  /**
+   * Extra tools to include alongside bashkit tools.
+   * All tools (bashkit + extra) get context layers applied.
+   */
+  extraTools?: ToolSet;
+  /**
+   * Custom context layers applied to all tools.
+   * Runs after built-in layers (execution policy, output policy).
+   */
+  layers?: ContextLayer[];
+}
+
 export type AgentConfig = {
   tools?: {
     Bash?: ToolConfig;
@@ -183,6 +210,11 @@ export type AgentConfig = {
   modelRegistry?: ModelRegistryConfig;
   /** Budget tracking configuration */
   budget?: BudgetConfig;
+  /**
+   * Context layer config. Opt-in — if not provided, tools work as they do today.
+   * When provided, wraps all tools (bashkit + extraTools) with execution and output policies.
+   */
+  context?: ContextConfig;
   defaultTimeout?: number;
   workingDirectory?: string;
 };
diff --git a/tests/context/build-context.test.ts b/tests/context/build-context.test.ts
new file mode 100644
index 0000000..4cbe43b
--- /dev/null
+++ b/tests/context/build-context.test.ts
@@ -0,0 +1,434 @@
+import { describe, it, expect } from "vitest";
+import { discoverInstructions } from "@/context/instructions";
+import { collectEnvironment, formatEnvironment } from "@/context/environment";
+import { buildToolGuidance } from "@/context/tool-guidance";
+import { buildSystemContext } from "@/context/build-context";
+import { createMockSandbox } from "../helpers";
+
+// ---------------------------------------------------------------------------
+// discoverInstructions
+// ---------------------------------------------------------------------------
+
+describe("discoverInstructions", () => {
+  it("finds AGENTS.md walking from CWD to project root", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "Project instructions",
+        "/project/src": ["index.ts"],
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project/src",
+    });
+
+    const result = await discoverInstructions(sandbox);
+    expect(result).not.toBeNull();
+    expect(result!.text).toContain("Project instructions");
+    expect(result!.sources.length).toBeGreaterThan(0);
+  });
+
+  it("finds CLAUDE.md when AGENTS.md not present", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/CLAUDE.md": "Claude instructions",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const result = await discoverInstructions(sandbox);
+    expect(result).not.toBeNull();
+    expect(result!.text).toContain("Claude instructions");
+  });
+
+  it("merges multiple instruction files (root → CWD, most specific last)", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "Root instructions",
+        "/project/src/AGENTS.md": "Src instructions",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project/src",
+    });
+
+    const result = await discoverInstructions(sandbox);
+    expect(result).not.toBeNull();
+    // Root first, then src
+    const rootIdx = result!.text.indexOf("Root instructions");
+    const srcIdx = result!.text.indexOf("Src instructions");
+    expect(rootIdx).toBeLessThan(srcIdx);
+    expect(result!.sources).toHaveLength(2);
+  });
+
+  it("prepends global instructions when globalPath configured", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/home/.bashkit/AGENTS.md": "Global instructions",
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "Project instructions",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const result = await discoverInstructions(sandbox, {
+      globalPath: "/home/.bashkit/AGENTS.md",
+    });
+    expect(result).not.toBeNull();
+    // Global first
+    const globalIdx = result!.text.indexOf("Global instructions");
+    const projectIdx = result!.text.indexOf("Project instructions");
+    expect(globalIdx).toBeLessThan(projectIdx);
+    expect(result!.sources[0].scope).toBe("global");
+  });
+
+  it("truncates at maxBytes", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "x".repeat(1000),
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const result = await discoverInstructions(sandbox, { maxBytes: 100 });
+    expect(result).not.toBeNull();
+    expect(result!.text.length).toBeLessThanOrEqual(100);
+    expect(result!.sources.some((s) => s.truncated)).toBe(true);
+  });
+
+  it("returns null when no instruction files found", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/src": ["index.ts"],
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project/src",
+    });
+
+    const result = await discoverInstructions(sandbox);
+    expect(result).toBeNull();
+  });
+
+  it("stops walking at .git root marker", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/above/AGENTS.md": "Should not find this",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const result = await discoverInstructions(sandbox);
+    expect(result).toBeNull(); // No files found at or below .git root
+  });
+
+  it("first matching filename wins per directory", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "AGENTS content",
+        "/project/CLAUDE.md": "CLAUDE content",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const result = await discoverInstructions(sandbox);
+    expect(result).not.toBeNull();
+    expect(result!.text).toContain("AGENTS content");
+    expect(result!.text).not.toContain("CLAUDE content");
+    expect(result!.sources).toHaveLength(1);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// collectEnvironment
+// ---------------------------------------------------------------------------
+
+describe("collectEnvironment", () => {
+  it("returns CWD, shell, platform, date", async () => {
+    const sandbox = createMockSandbox({
+      execHandler: (cmd) => ({
+        stdout: cmd.includes("SHELL") ? "/bin/zsh\n" : "",
+        stderr: "",
+        exitCode: 0,
+        durationMs: 1,
+        interrupted: false,
+      }),
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/my/project",
+    });
+
+    const env = await collectEnvironment(sandbox);
+    expect(env.cwd).toBe("/my/project");
+    expect(env.shell).toBe("/bin/zsh");
+    expect(env.platform).toBeDefined();
+    expect(env.date).toMatch(/^\d{4}-\d{2}-\d{2}$/);
+  });
+
+  it("includes git branch when in a repo", async () => {
+    const sandbox = createMockSandbox({
+      execHandler: (cmd) => ({
+        stdout: cmd.includes("branch") ? "main\n" : "",
+        stderr: "",
+        exitCode: 0,
+        durationMs: 1,
+        interrupted: false,
+      }),
+    });
+
+    const env = await collectEnvironment(sandbox);
+    expect(env.gitBranch).toBe("main");
+  });
+
+  it("includes git changed file count", async () => {
+    const sandbox = createMockSandbox({
+      execHandler: (cmd) => ({
+        stdout: cmd.includes("status") ? "3\n" : "",
+        stderr: "",
+        exitCode: 0,
+        durationMs: 1,
+        interrupted: false,
+      }),
+    });
+
+    const env = await collectEnvironment(sandbox);
+    expect(env.gitStatus).toBe("3");
+  });
+
+  it("gracefully handles sandbox.exec failures", async () => {
+    const sandbox = createMockSandbox({
+      execHandler: () => {
+        throw new Error("exec failed");
+      },
+    });
+
+    const env = await collectEnvironment(sandbox);
+    expect(env.shell).toBe("unknown");
+    expect(env.gitBranch).toBeUndefined();
+    expect(env.gitStatus).toBeUndefined();
+    expect(env.timezone).toBeUndefined();
+  });
+
+  it("omits git info when git config disabled", async () => {
+    const execSpy: string[] = [];
+    const sandbox = createMockSandbox({
+      execHandler: (cmd) => {
+        execSpy.push(cmd);
+        return {
+          stdout: "",
+          stderr: "",
+          exitCode: 0,
+          durationMs: 1,
+          interrupted: false,
+        };
+      },
+    });
+
+    await collectEnvironment(sandbox, { git: false });
+    expect(execSpy.some((c) => c.includes("git"))).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// formatEnvironment
+// ---------------------------------------------------------------------------
+
+describe("formatEnvironment", () => {
+  it("formats as XML with required fields", () => {
+    const xml = formatEnvironment({
+      cwd: "/project",
+      shell: "/bin/zsh",
+      platform: "darwin",
+      date: "2026-03-29",
+    });
+    expect(xml).toContain("<environment_context>");
+    expect(xml).toContain("<cwd>/project</cwd>");
+    expect(xml).toContain("<shell>/bin/zsh</shell>");
+    expect(xml).toContain("<platform>darwin</platform>");
+    expect(xml).toContain("<date>2026-03-29</date>");
+    expect(xml).toContain("</environment_context>");
+  });
+
+  it("includes optional fields when present", () => {
+    const xml = formatEnvironment({
+      cwd: "/project",
+      shell: "/bin/zsh",
+      platform: "darwin",
+      date: "2026-03-29",
+      timezone: "PST",
+      gitBranch: "feature/ctx",
+      gitStatus: "5",
+    });
+    expect(xml).toContain("<timezone>PST</timezone>");
+    expect(xml).toContain("<git_branch>feature/ctx</git_branch>");
+    expect(xml).toContain("<git_changed_files>5</git_changed_files>");
+  });
+
+  it("includes custom fields", () => {
+    const xml = formatEnvironment(
+      {
+        cwd: "/project",
+        shell: "/bin/zsh",
+        platform: "darwin",
+        date: "2026-03-29",
+      },
+      { app_version: "1.2.3", user: "josh" },
+    );
+    expect(xml).toContain("<app_version>1.2.3</app_version>");
+    expect(xml).toContain("<user>josh</user>");
+  });
+
+  it("omits optional fields when undefined", () => {
+    const xml = formatEnvironment({
+      cwd: "/project",
+      shell: "/bin/zsh",
+      platform: "darwin",
+      date: "2026-03-29",
+    });
+    expect(xml).not.toContain("timezone");
+    expect(xml).not.toContain("git_branch");
+    expect(xml).not.toContain("git_changed_files");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildToolGuidance
+// ---------------------------------------------------------------------------
+
+describe("buildToolGuidance", () => {
+  it("only includes hints for registered tool names", () => {
+    const text = buildToolGuidance({ tools: ["Bash", "Read"] });
+    expect(text).toContain("**Bash**");
+    expect(text).toContain("**Read**");
+    expect(text).not.toContain("**Write**");
+    expect(text).not.toContain("**Grep**");
+  });
+
+  it("merges custom hints with defaults", () => {
+    const text = buildToolGuidance({
+      tools: ["Bash", "Research"],
+      hints: { Research: "Query study data across the org." },
+    });
+    expect(text).toContain("**Bash**"); // default hint
+    expect(text).toContain("**Research**: Query study data across the org.");
+  });
+
+  it("custom hints override defaults", () => {
+    const text = buildToolGuidance({
+      tools: ["Bash"],
+      hints: { Bash: "Custom bash hint" },
+    });
+    expect(text).toContain("Custom bash hint");
+    expect(text).not.toContain("Prefer Read/Grep/Glob");
+  });
+
+  it("includes custom guidelines", () => {
+    const text = buildToolGuidance({
+      tools: ["Bash"],
+      guidelines: [
+        "Prefer Grep over Bash for file search",
+        "Always use timeout",
+      ],
+    });
+    expect(text).toContain("## Guidelines");
+    expect(text).toContain("Prefer Grep over Bash");
+    expect(text).toContain("Always use timeout");
+  });
+
+  it("skips tools with no hint", () => {
+    const text = buildToolGuidance({
+      tools: ["Bash", "UnknownTool"],
+    });
+    expect(text).toContain("**Bash**");
+    expect(text).not.toContain("UnknownTool");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// buildSystemContext
+// ---------------------------------------------------------------------------
+
+describe("buildSystemContext", () => {
+  it("returns individual sections and combined string", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "Instructions here",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const ctx = await buildSystemContext(sandbox, {
+      instructions: true,
+      environment: true,
+      toolGuidance: { tools: ["Bash", "Read"] },
+    });
+
+    expect(ctx.instructions).toContain("Instructions here");
+    expect(ctx.environment).toContain("<environment_context>");
+    expect(ctx.toolGuidance).toContain("**Bash**");
+    expect(ctx.combined).toContain("Instructions here");
+    expect(ctx.combined).toContain("<environment_context>");
+    expect(ctx.combined).toContain("**Bash**");
+  });
+
+  it("omits null sections from combined string", async () => {
+    const sandbox = createMockSandbox();
+
+    const ctx = await buildSystemContext(sandbox, {
+      // No instructions, no environment, only tool guidance
+      toolGuidance: { tools: ["Bash"] },
+    });
+
+    expect(ctx.instructions).toBeNull();
+    expect(ctx.environment).toBeNull();
+    expect(ctx.toolGuidance).toContain("**Bash**");
+    expect(ctx.combined).toBe(ctx.toolGuidance);
+  });
+
+  it("includes metadata about sources", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/project/.git": ["HEAD"],
+        "/project/AGENTS.md": "test",
+      },
+    });
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const ctx = await buildSystemContext(sandbox, {
+      instructions: true,
+      environment: true,
+    });
+
+    expect(ctx.meta.instructionSources).toBeDefined();
+    expect(ctx.meta.instructionSources!.length).toBeGreaterThan(0);
+    expect(ctx.meta.environmentContext).toBeDefined();
+    expect(ctx.meta.environmentContext!.cwd).toBe("/project");
+  });
+
+  it("returns empty combined when no sections configured", async () => {
+    const sandbox = createMockSandbox();
+    const ctx = await buildSystemContext(sandbox);
+    expect(ctx.combined).toBe("");
+  });
+});
diff --git a/tests/context/execution-policy.test.ts b/tests/context/execution-policy.test.ts
new file mode 100644
index 0000000..969cd36
--- /dev/null
+++ b/tests/context/execution-policy.test.ts
@@ -0,0 +1,140 @@
+import { describe, it, expect, vi } from "vitest";
+import type { PlanModeState } from "@/tools/enter-plan-mode";
+import { createExecutionPolicy } from "@/context/execution-policy";
+
+/** Resolve beforeExecute result (handles sync/async union) */
+async function gate(
+  layer: ReturnType<typeof createExecutionPolicy>,
+  toolName: string,
+  params: Record<string, unknown> = {},
+): Promise<{ error: string } | undefined> {
+  return await layer.beforeExecute!(toolName, params);
+}
+
+describe("createExecutionPolicy", () => {
+  it("blocks Bash/Write/Edit when plan mode is active", async () => {
+    const state: PlanModeState = { isActive: true };
+    const layer = createExecutionPolicy(state);
+
+    for (const toolName of ["Bash", "Write", "Edit"]) {
+      const result = await gate(layer, toolName);
+      expect(result).toBeDefined();
+      expect(result!.error).toContain("not available in plan mode");
+      expect(result!.error).toContain(toolName);
+    }
+  });
+
+  it("allows Read/Grep/Glob when plan mode is active", async () => {
+    const state: PlanModeState = { isActive: true };
+    const layer = createExecutionPolicy(state);
+
+    for (const toolName of ["Read", "Grep", "Glob"]) {
+      const result = await gate(layer, toolName);
+      expect(result).toBeUndefined();
+    }
+  });
+
+  it("allows everything when plan mode is inactive", async () => {
+    const state: PlanModeState = { isActive: false };
+    const layer = createExecutionPolicy(state);
+
+    for (const toolName of ["Bash", "Write", "Edit", "Read", "Grep", "Glob"]) {
+      expect(await gate(layer, toolName)).toBeUndefined();
+    }
+  });
+
+  it("reacts to plan mode state changes between calls", async () => {
+    const state: PlanModeState = { isActive: false };
+    const layer = createExecutionPolicy(state);
+
+    expect(await gate(layer, "Bash")).toBeUndefined();
+
+    state.isActive = true;
+    expect(await gate(layer, "Bash")).toBeDefined();
+
+    state.isActive = false;
+    expect(await gate(layer, "Bash")).toBeUndefined();
+  });
+
+  it("supports custom blocked tool list", async () => {
+    const state: PlanModeState = { isActive: true };
+    const layer = createExecutionPolicy(state, {
+      planModeBlockedTools: ["Bash", "Research"],
+    });
+
+    // Bash still blocked
+    expect(await gate(layer, "Bash")).toBeDefined();
+    // Research now blocked
+    expect(await gate(layer, "Research")).toBeDefined();
+    // Write NOT blocked (not in custom list)
+    expect(await gate(layer, "Write")).toBeUndefined();
+    // Edit NOT blocked (not in custom list)
+    expect(await gate(layer, "Edit")).toBeUndefined();
+  });
+
+  it("supports custom shouldBlock predicate", async () => {
+    const state: PlanModeState = { isActive: false };
+    const shouldBlock = vi.fn(
+      (toolName: string, params: Record<string, unknown>) => {
+        if (
+          toolName === "Bash" &&
+          typeof params.command === "string" &&
+          params.command.includes("rm -rf")
+        ) {
+          return "Destructive command blocked";
+        }
+        return undefined;
+      },
+    );
+
+    const layer = createExecutionPolicy(state, { shouldBlock });
+
+    // Safe command allowed
+    expect(
+      await gate(layer, "Bash", { command: "echo hello" }),
+    ).toBeUndefined();
+
+    // Destructive command blocked
+    const result = await gate(layer, "Bash", { command: "rm -rf /" });
+    expect(result).toBeDefined();
+    expect(result!.error).toBe("Destructive command blocked");
+    expect(shouldBlock).toHaveBeenCalledTimes(2);
+  });
+
+  it("plan mode gate takes priority over shouldBlock", async () => {
+    const state: PlanModeState = { isActive: true };
+    const shouldBlock = vi.fn(() => undefined);
+
+    const layer = createExecutionPolicy(state, { shouldBlock });
+
+    // Plan mode blocks before shouldBlock is even called
+    const result = await gate(layer, "Bash");
+    expect(result).toBeDefined();
+    expect(result!.error).toContain("not available in plan mode");
+    expect(shouldBlock).not.toHaveBeenCalled();
+  });
+
+  it("shouldBlock runs when plan mode allows the tool", async () => {
+    const state: PlanModeState = { isActive: true };
+    const shouldBlock = vi.fn(() => "custom block");
+
+    const layer = createExecutionPolicy(state, { shouldBlock });
+
+    // Read is allowed by plan mode, so shouldBlock runs
+    const result = await gate(layer, "Read");
+    expect(result).toBeDefined();
+    expect(result!.error).toBe("custom block");
+    expect(shouldBlock).toHaveBeenCalledWith("Read", {});
+  });
+
+  it("error message includes guidance about read-only tools", async () => {
+    const state: PlanModeState = { isActive: true };
+    const layer = createExecutionPolicy(state);
+
+    const result = await gate(layer, "Bash");
+    expect(result!.error).toContain("Read");
+    expect(result!.error).toContain("Grep");
+    expect(result!.error).toContain("Glob");
+    expect(result!.error).toContain("ExitPlanMode");
+  });
+});
diff --git a/tests/context/integration.test.ts b/tests/context/integration.test.ts
new file mode 100644
index 0000000..9768dd1
--- /dev/null
+++ b/tests/context/integration.test.ts
@@ -0,0 +1,275 @@
+import { describe, it, expect, vi } from "vitest";
+import { tool, zodSchema } from "ai";
+import { z } from "zod";
+import { createAgentTools } from "@/tools/index";
+import { applyContextLayers, type ContextLayer } from "@/context/index";
+import {
+  createMockSandbox,
+  executeTool,
+  assertSuccess,
+  assertError,
+} from "../helpers";
+
+function createMockExtraTool(
+  executeFn?: (params: { input?: string | null }) => Promise<unknown>,
+) {
+  return tool({
+    description: "Extra tool for testing",
+    inputSchema: zodSchema(
+      z.object({ input: z.string().nullable().default(null) }),
+    ),
+    execute:
+      (executeFn as (params: { input?: string | null }) => Promise<unknown>) ??
+      (async () => ({ output: "extra" })),
+  });
+}
+
+describe("createAgentTools with context config", () => {
+  it("no wrapping when context config omitted (backward compat)", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/file.ts": "hello" },
+    });
+    const { tools, contextLayers } = await createAgentTools(sandbox);
+
+    // Tools work normally
+    const result = await executeTool(tools.Read, {
+      file_path: "/workspace/file.ts",
+    });
+    assertSuccess(result);
+
+    // No context layers
+    expect(contextLayers).toEqual([]);
+  });
+
+  it("applies execution policy when context + planMode provided", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/file.ts": "content" },
+    });
+    const { tools, planModeState } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: { planModeBlockedTools: ["Bash", "Write", "Edit"] },
+        outputPolicy: false,
+      },
+    });
+
+    // Plan mode active → Bash blocked
+    planModeState!.isActive = true;
+    const bashResult = await executeTool(tools.Bash, {
+      command: "echo test",
+      description: "t",
+    });
+    assertError(bashResult);
+    expect(bashResult.error).toContain("not available in plan mode");
+
+    // Read still allowed
+    const readResult = await executeTool(tools.Read, {
+      file_path: "/workspace/file.ts",
+    });
+    assertSuccess(readResult);
+  });
+
+  it("applies output policy by default when context provided", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/workspace/large.ts": "x".repeat(50000),
+      },
+    });
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: { maxOutputLength: 100, redirectionThreshold: 50 },
+      },
+    });
+
+    const result = (await executeTool(tools.Read, {
+      file_path: "/workspace/large.ts",
+    })) as Record<string, unknown>;
+    assertSuccess(result);
+    // Output should be truncated
+    expect(result._hint).toBeDefined();
+  });
+
+  it("disables output policy when outputPolicy: false", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/workspace/large.ts": "x".repeat(50000),
+      },
+    });
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: false,
+      },
+    });
+
+    const result = (await executeTool(tools.Read, {
+      file_path: "/workspace/large.ts",
+    })) as Record<string, unknown>;
+    assertSuccess(result);
+    // No truncation hint
+    expect(result._hint).toBeUndefined();
+  });
+
+  it("wraps extraTools with same context layers", async () => {
+    const sandbox = createMockSandbox();
+    const customTool = createMockExtraTool(async () => ({
+      output: "x".repeat(200),
+    }));
+
+    const { tools, planModeState } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: {
+          planModeBlockedTools: ["Bash", "Write", "Edit", "CustomTool"],
+        },
+        outputPolicy: false,
+        extraTools: { CustomTool: customTool },
+      },
+    });
+
+    expect(tools.CustomTool).toBeDefined();
+
+    // CustomTool blocked in plan mode
+    planModeState!.isActive = true;
+    const result = await executeTool(tools.CustomTool, { input: "test" });
+    assertError(result);
+    expect(result.error).toContain("not available in plan mode");
+  });
+
+  it("returns contextLayers for wrapping tools added later", async () => {
+    const sandbox = createMockSandbox();
+    const { contextLayers } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: {},
+        outputPolicy: false,
+      },
+    });
+
+    expect(contextLayers).toBeDefined();
+    expect(contextLayers.length).toBeGreaterThan(0);
+
+    // Late-added tool gets same layers
+    const lateTool = createMockExtraTool(async () => ({ output: "late" }));
+    const wrapped = applyContextLayers({ LateTool: lateTool }, contextLayers);
+    expect(wrapped.LateTool).toBeDefined();
+  });
+
+  it("custom layers run after built-in layers", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/file.ts": "content" },
+    });
+    const order: string[] = [];
+
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: false,
+        layers: [
+          {
+            beforeExecute: async () => {
+              order.push("custom");
+              return undefined;
+            },
+          },
+        ],
+      },
+    });
+
+    await executeTool(tools.Read, { file_path: "/workspace/file.ts" });
+    // Custom layer ran (output policy was disabled so only custom ran)
+    expect(order).toContain("custom");
+  });
+
+  it("late-added tools via applyContextLayers enforce execution policy", async () => {
+    const sandbox = createMockSandbox();
+    const { contextLayers, planModeState } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: {
+          planModeBlockedTools: ["Bash", "Write", "Edit", "LateTool"],
+        },
+        outputPolicy: false,
+      },
+    });
+
+    planModeState!.isActive = true;
+
+    const lateTool = createMockExtraTool(async () => ({ output: "late" }));
+    const wrapped = applyContextLayers({ LateTool: lateTool }, contextLayers);
+
+    const result = await executeTool(wrapped.LateTool, { input: "test" });
+    assertError(result);
+    expect(result.error).toContain("not available in plan mode");
+  });
+
+  it("execution + output policy both active end-to-end", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/big.ts": "x".repeat(50000) },
+    });
+    const { tools, planModeState } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: { planModeBlockedTools: ["Bash", "Write", "Edit"] },
+        outputPolicy: { maxOutputLength: 100, redirectionThreshold: 50 },
+      },
+    });
+
+    // Read works and gets truncated
+    const readResult = (await executeTool(tools.Read, {
+      file_path: "/workspace/big.ts",
+    })) as Record<string, unknown>;
+    assertSuccess(readResult);
+    expect(readResult._hint).toBeDefined();
+
+    // Bash blocked by execution policy (output policy never reached)
+    planModeState!.isActive = true;
+    const bashResult = await executeTool(tools.Bash, {
+      command: "echo test",
+      description: "t",
+    });
+    assertError(bashResult);
+  });
+
+  it("extraTools accessible alongside built-in tools", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/file.ts": "content" },
+    });
+    const customTool = createMockExtraTool(async () => ({
+      output: "custom result",
+    }));
+
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: false,
+        extraTools: { CustomTool: customTool },
+      },
+    });
+
+    // Built-in tool works
+    const readResult = await executeTool(tools.Read, {
+      file_path: "/workspace/file.ts",
+    });
+    assertSuccess(readResult);
+
+    // Extra tool works
+    const customResult = (await executeTool(tools.CustomTool, {
+      input: "test",
+    })) as Record<string, unknown>;
+    assertSuccess(customResult);
+    expect(customResult.output).toBe("custom result");
+  });
+
+  it("execution policy not created without planMode enabled", async () => {
+    const sandbox = createMockSandbox();
+    const { contextLayers } = await createAgentTools(sandbox, {
+      // planMode NOT set
+      context: {
+        executionPolicy: { planModeBlockedTools: ["Bash"] },
+        outputPolicy: false,
+      },
+    });
+
+    // No execution policy layer since planMode not enabled
+    // Only custom layers would be here (none provided)
+    expect(contextLayers).toHaveLength(0);
+  });
+});
diff --git a/tests/context/output-policy.test.ts b/tests/context/output-policy.test.ts
new file mode 100644
index 0000000..59d520a
--- /dev/null
+++ b/tests/context/output-policy.test.ts
@@ -0,0 +1,517 @@
+import { describe, it, expect, vi } from "vitest";
+import { createOutputPolicy } from "@/context/output-policy";
+import { createMockSandbox } from "../helpers";
+
+/** Helper to run afterExecute (handles async) */
+async function transform(
+  layer: ReturnType<typeof createOutputPolicy>,
+  toolName: string,
+  params: Record<string, unknown>,
+  result: Record<string, unknown>,
+): Promise<Record<string, unknown>> {
+  return await layer.afterExecute!(toolName, params, result);
+}
+
+describe("createOutputPolicy", () => {
+  // -----------------------------------------------------------------------
+  // Basic truncation behavior
+  // -----------------------------------------------------------------------
+
+  it("passes through results below redirection threshold", async () => {
+    const layer = createOutputPolicy({ redirectionThreshold: 100 });
+    const result = { stdout: "short output" };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed).toEqual(result);
+  });
+
+  it("truncates results above maxOutputLength", async () => {
+    const layer = createOutputPolicy({
+      maxOutputLength: 100,
+      redirectionThreshold: 50,
+    });
+    const result = { stdout: "x".repeat(200) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(typeof transformed.stdout).toBe("string");
+    expect((transformed.stdout as string).length).toBeLessThan(200);
+    expect(transformed._hint).toBeDefined();
+  });
+
+  it("passes through when text equals threshold exactly", async () => {
+    const layer = createOutputPolicy({ redirectionThreshold: 20 });
+    const result = { stdout: "x".repeat(20) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed).toEqual(result);
+  });
+
+  // -----------------------------------------------------------------------
+  // Per-tool hints
+  // -----------------------------------------------------------------------
+
+  it("injects Bash-specific redirection hint", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    const hint = transformed._hint as string;
+    expect(hint).toContain("head");
+    expect(hint).toContain("tail");
+    expect(hint).toContain("grep");
+  });
+
+  it("injects Read-specific redirection hint", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { content: "x".repeat(100) };
+    const transformed = await transform(layer, "Read", {}, result);
+    const hint = transformed._hint as string;
+    expect(hint).toContain("offset");
+    expect(hint).toContain("limit");
+  });
+
+  it("injects Grep-specific redirection hint", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Grep", {}, result);
+    const hint = transformed._hint as string;
+    expect(hint).toContain("head_limit");
+    expect(hint).toContain("offset");
+  });
+
+  it("injects generic hint for unknown tools", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "SqlQuery", {}, result);
+    const hint = transformed._hint as string;
+    expect(hint).toContain("Read");
+    expect(hint).toContain("Grep");
+  });
+
+  // -----------------------------------------------------------------------
+  // Custom hints map
+  // -----------------------------------------------------------------------
+
+  it("custom hints map overrides built-in hints", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      hints: { Bash: "Custom bash hint" },
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed._hint).toBe("Custom bash hint");
+  });
+
+  it("custom hints map works for external tools", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      hints: { Research: "Full data on disk. Use Read/Grep." },
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Research", {}, result);
+    expect(transformed._hint).toBe("Full data on disk. Use Read/Grep.");
+  });
+
+  // -----------------------------------------------------------------------
+  // Custom buildHint callback
+  // -----------------------------------------------------------------------
+
+  it("buildHint callback overrides all other hints", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      hints: { Bash: "should not see this" },
+      buildHint: (_toolName, _params, len) => `Custom: ${len} chars truncated`,
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed._hint).toBe("Custom: 100 chars truncated");
+  });
+
+  it("buildHint receives result object", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      buildHint: (toolName, _params, _len, result) => {
+        if (toolName === "Research" && "queryHint" in result) {
+          return `Data at ${result.file}. ${result.queryHint}`;
+        }
+        return undefined;
+      },
+    });
+    // Serialize of this will exceed threshold
+    const result = {
+      file: "/tmp/research/data.jsonl",
+      queryHint: "use jq",
+      data: "x".repeat(100),
+    };
+    const transformed = await transform(layer, "Research", {}, result);
+    expect(transformed._hint).toBe("Data at /tmp/research/data.jsonl. use jq");
+  });
+
+  it("buildHint falls through to hints map when returning undefined", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      hints: { Bash: "Hints map fallback" },
+      buildHint: (toolName) => {
+        if (toolName === "Research") return "Research hint";
+        return undefined;
+      },
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed._hint).toBe("Hints map fallback");
+  });
+
+  // -----------------------------------------------------------------------
+  // excludeTools
+  // -----------------------------------------------------------------------
+
+  it("excludes tools in excludeTools list", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      excludeTools: ["Research"],
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Research", {}, result);
+    expect(transformed).toEqual(result); // unchanged
+  });
+
+  // -----------------------------------------------------------------------
+  // Custom truncation function
+  // -----------------------------------------------------------------------
+
+  it("uses custom truncation function", async () => {
+    const customTruncate = vi.fn(
+      (text: string, maxLen: number) => text.slice(0, maxLen) + "...CUSTOM",
+    );
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      truncate: customTruncate,
+    });
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(customTruncate).toHaveBeenCalledWith("x".repeat(100), 50);
+    expect((transformed.stdout as string).endsWith("...CUSTOM")).toBe(true);
+  });
+
+  // -----------------------------------------------------------------------
+  // Structured output handling
+  // -----------------------------------------------------------------------
+
+  it("handles objects with stdout field", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { stdout: "x".repeat(100), exitCode: 0 };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(typeof transformed.stdout).toBe("string");
+    expect(transformed.exitCode).toBe(0); // preserved
+    expect(transformed._hint).toBeDefined();
+  });
+
+  it("handles objects with content field", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { content: "x".repeat(100), lineCount: 50 };
+    const transformed = await transform(layer, "Read", {}, result);
+    expect(typeof transformed.content).toBe("string");
+    expect(transformed.lineCount).toBe(50); // preserved
+    expect(transformed._hint).toBeDefined();
+  });
+
+  it("handles structured results via JSON serialization", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    // No stdout or content — falls through to JSON serialization
+    const result = { data: "x".repeat(100), total: 42 };
+    const transformed = await transform(layer, "Custom", {}, result);
+    expect(transformed._truncated).toBeDefined();
+    expect(transformed._hint).toBeDefined();
+  });
+
+  // -----------------------------------------------------------------------
+  // stashOutput
+  // -----------------------------------------------------------------------
+
+  // -----------------------------------------------------------------------
+  // Priority: stdout > content > JSON serialization
+  // -----------------------------------------------------------------------
+
+  it("stdout takes priority over content when both present", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    const result = { stdout: "x".repeat(100), content: "y".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    // stdout was truncated, content preserved as-is
+    expect(typeof transformed.stdout).toBe("string");
+    expect((transformed.stdout as string).length).toBeLessThan(100);
+    expect(transformed.content).toBe("y".repeat(100));
+  });
+
+  // -----------------------------------------------------------------------
+  // Error results
+  // -----------------------------------------------------------------------
+
+  it("error results pass through when below threshold", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 200, // high enough for serialized error
+      maxOutputLength: 300,
+    });
+    const result = { error: "something went wrong" };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed).toEqual(result);
+  });
+
+  it("large error results still get truncated", async () => {
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+    });
+    // Error with a very large message — serialized JSON exceeds threshold
+    const result = { error: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    expect(transformed._hint).toBeDefined();
+  });
+
+  // -----------------------------------------------------------------------
+  // stashOutput + hint combinations
+  // -----------------------------------------------------------------------
+
+  it("stashOutput + hints map: file path prepended to custom hint", async () => {
+    const sandbox = createMockSandbox();
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      hints: { Bash: "Re-run with | head" },
+      stashOutput: {
+        sandbox,
+        tools: ["Bash"],
+      },
+    });
+
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    const hint = transformed._hint as string;
+    expect(hint).toContain("Full output saved to");
+    expect(hint).toContain("Re-run with | head");
+  });
+
+  it("stashOutput + buildHint: file path prepended to callback hint", async () => {
+    const sandbox = createMockSandbox();
+    const layer = createOutputPolicy({
+      redirectionThreshold: 10,
+      maxOutputLength: 50,
+      buildHint: () => "Custom callback hint",
+      stashOutput: {
+        sandbox,
+        tools: ["Bash"],
+      },
+    });
+
+    const result = { stdout: "x".repeat(100) };
+    const transformed = await transform(layer, "Bash", {}, result);
+    const hint = transformed._hint as string;
+    expect(hint).toContain("Full output saved to");
+    expect(hint).toContain("Custom callback hint");
+  });
+
+  // -----------------------------------------------------------------------
+  // stashOutput
+  // -----------------------------------------------------------------------
+
+  describe("stashOutput", () => {
+    it("writes full result to disk before truncating", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      const transformed = await transform(layer, "Bash", {}, result);
+
+      // File was written
+      const files = sandbox.getFiles();
+      const paths = Object.keys(files);
+      const stashPath = paths.find((p) =>
+        p.startsWith("/tmp/.bashkit/tool-output/Bash-"),
+      );
+      expect(stashPath).toBeDefined();
+      expect(files[stashPath!]).toBe("x".repeat(100));
+
+      // Hint includes file path
+      expect(transformed._hint).toContain(stashPath!);
+    });
+
+    it("hint includes stash file path", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      const transformed = await transform(layer, "Bash", {}, result);
+      expect(transformed._hint as string).toContain("Full output saved to");
+    });
+
+    it("does not write for tools not in the tools list", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      await transform(layer, "Grep", {}, result);
+
+      const files = sandbox.getFiles();
+      const stashPaths = Object.keys(files).filter((p) =>
+        p.startsWith("/tmp/.bashkit/tool-output/"),
+      );
+      expect(stashPaths).toHaveLength(0);
+    });
+
+    it("creates dir via mkdir -p", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      await transform(layer, "Bash", {}, result);
+
+      const history = sandbox.getExecHistory();
+      expect(history.some((e) => e.command.includes("mkdir -p"))).toBe(true);
+    });
+
+    it("does not write when output is below threshold", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 200,
+        maxOutputLength: 300,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "short" };
+      const transformed = await transform(layer, "Bash", {}, result);
+
+      expect(transformed).toEqual(result); // unchanged
+      const files = sandbox.getFiles();
+      expect(
+        Object.keys(files).filter((p) =>
+          p.startsWith("/tmp/.bashkit/tool-output/"),
+        ),
+      ).toHaveLength(0);
+    });
+
+    it("pathFor uses custom path when provided", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Research"],
+          pathFor: (toolName, _params, result) => {
+            if (toolName === "Research" && "kind" in result) {
+              return `/tmp/research/${String(result.kind)}.jsonl`;
+            }
+            return undefined;
+          },
+        },
+      });
+
+      const result = { stdout: "x".repeat(100), kind: "search_messages" };
+      await transform(layer, "Research", {}, result);
+
+      const files = sandbox.getFiles();
+      expect(files["/tmp/research/search_messages.jsonl"]).toBeDefined();
+    });
+
+    it("pathFor falls back to default when returning undefined", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+          pathFor: () => undefined,
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      await transform(layer, "Bash", {}, result);
+
+      const files = sandbox.getFiles();
+      const stashPath = Object.keys(files).find((p) =>
+        p.startsWith("/tmp/.bashkit/tool-output/Bash-"),
+      );
+      expect(stashPath).toBeDefined();
+    });
+
+    it("uses custom stash dir", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          dir: "/custom/output",
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      await transform(layer, "Bash", {}, result);
+
+      const files = sandbox.getFiles();
+      const stashPath = Object.keys(files).find((p) =>
+        p.startsWith("/custom/output/Bash-"),
+      );
+      expect(stashPath).toBeDefined();
+    });
+  });
+});
diff --git a/tests/context/parallel.test.ts b/tests/context/parallel.test.ts
new file mode 100644
index 0000000..f4cb2ce
--- /dev/null
+++ b/tests/context/parallel.test.ts
@@ -0,0 +1,175 @@
+import { describe, it, expect } from "vitest";
+import type { PlanModeState } from "@/tools/enter-plan-mode";
+import { createExecutionPolicy } from "@/context/execution-policy";
+import { applyContextLayers, type ContextLayer } from "@/context/index";
+import { createAgentTools } from "@/tools/index";
+import {
+  createMockSandbox,
+  executeTool,
+  assertSuccess,
+  assertError,
+} from "../helpers";
+
+describe("context layer under parallel execution", () => {
+  it("beforeExecute runs independently for each parallel call", async () => {
+    const callLog: string[] = [];
+    const layer: ContextLayer = {
+      beforeExecute: async (toolName) => {
+        callLog.push(`before:${toolName}`);
+        await new Promise((r) => setTimeout(r, 10));
+        callLog.push(`before-done:${toolName}`);
+        return undefined;
+      },
+    };
+
+    const sandbox = createMockSandbox({
+      files: {
+        "/workspace/file.ts": "content",
+        "/workspace/file2.ts": "content2",
+      },
+    });
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: false,
+        layers: [layer],
+      },
+    });
+
+    // Simulate parallel dispatch
+    await Promise.all([
+      executeTool(tools.Read, { file_path: "/workspace/file.ts" }),
+      executeTool(tools.Read, { file_path: "/workspace/file2.ts" }),
+    ]);
+
+    // Both calls should have their before entries
+    const beforeStarts = callLog.filter((l) => l.startsWith("before:"));
+    expect(beforeStarts).toHaveLength(2);
+  });
+
+  it("rejection in one parallel call doesn't block others", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/file.ts": "content" },
+    });
+    const { tools, planModeState } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: { planModeBlockedTools: ["Bash"] },
+        outputPolicy: false,
+      },
+    });
+
+    planModeState!.isActive = true;
+
+    const [bashResult, readResult] = await Promise.all([
+      executeTool(tools.Bash, { command: "echo test", description: "t" }),
+      executeTool(tools.Read, { file_path: "/workspace/file.ts" }),
+    ]);
+
+    assertError(bashResult);
+    assertSuccess(readResult);
+  });
+
+  it("plan mode state change mid-parallel does not corrupt results", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/workspace/a.ts": "aaa",
+        "/workspace/b.ts": "bbb",
+      },
+    });
+    const { tools, planModeState } = await createAgentTools(sandbox, {
+      planMode: true,
+      context: {
+        executionPolicy: { planModeBlockedTools: ["Bash"] },
+        outputPolicy: false,
+      },
+    });
+
+    planModeState!.isActive = false;
+
+    // Fire off parallel reads, toggle plan mode during execution
+    const results = await Promise.all([
+      executeTool(tools.Read, { file_path: "/workspace/a.ts" }),
+      (async () => {
+        // Small delay then toggle — reads should still complete
+        await new Promise((r) => setTimeout(r, 1));
+        planModeState!.isActive = true;
+        return executeTool(tools.Read, { file_path: "/workspace/b.ts" });
+      })(),
+    ]);
+
+    // Both reads should succeed (Read is not blocked)
+    assertSuccess(results[0]);
+    assertSuccess(results[1]);
+  });
+
+  it("stashOutput under parallel calls writes separate files", async () => {
+    const sandbox = createMockSandbox({
+      files: {
+        "/workspace/a.ts": "a".repeat(100),
+        "/workspace/b.ts": "b".repeat(100),
+      },
+    });
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: {
+          maxOutputLength: 50,
+          redirectionThreshold: 10,
+          stashOutput: {
+            sandbox,
+            tools: ["Read"],
+          },
+        },
+      },
+    });
+
+    await Promise.all([
+      executeTool(tools.Read, { file_path: "/workspace/a.ts" }),
+      executeTool(tools.Read, { file_path: "/workspace/b.ts" }),
+    ]);
+
+    // Should have two separate stash files
+    const files = sandbox.getFiles();
+    const stashPaths = Object.keys(files).filter((p) =>
+      p.startsWith("/tmp/.bashkit/tool-output/Read-"),
+    );
+    expect(stashPaths.length).toBe(2);
+  });
+
+  it("afterExecute transforms are isolated between parallel calls", async () => {
+    let callCount = 0;
+    const layer: ContextLayer = {
+      afterExecute: async (_toolName, _params, result) => {
+        callCount++;
+        const myCount = callCount;
+        // Small delay to encourage interleaving
+        await new Promise((r) => setTimeout(r, 5));
+        return { ...result, _callNumber: myCount };
+      },
+    };
+
+    const sandbox = createMockSandbox({
+      files: {
+        "/workspace/a.ts": "aaa",
+        "/workspace/b.ts": "bbb",
+      },
+    });
+    const { tools } = await createAgentTools(sandbox, {
+      context: {
+        outputPolicy: false,
+        layers: [layer],
+      },
+    });
+
+    const results = await Promise.all([
+      executeTool(tools.Read, { file_path: "/workspace/a.ts" }),
+      executeTool(tools.Read, { file_path: "/workspace/b.ts" }),
+    ]);
+
+    const r0 = results[0] as Record<string, unknown>;
+    const r1 = results[1] as Record<string, unknown>;
+
+    // Each got its own transform, no shared state corruption
+    expect(r0._callNumber).not.toBe(r1._callNumber);
+    expect([r0._callNumber, r1._callNumber].sort()).toEqual([1, 2]);
+  });
+});
diff --git a/tests/context/prepare-step.test.ts b/tests/context/prepare-step.test.ts
new file mode 100644
index 0000000..5c41484
--- /dev/null
+++ b/tests/context/prepare-step.test.ts
@@ -0,0 +1,196 @@
+import { describe, it, expect, vi } from "vitest";
+import type { ModelMessage } from "ai";
+import type { PlanModeState } from "@/tools/enter-plan-mode";
+import { createPrepareStep } from "@/context/prepare-step";
+
+function makeMessages(count: number): ModelMessage[] {
+  const messages: ModelMessage[] = [];
+  for (let i = 0; i < count; i++) {
+    messages.push({
+      role: i % 2 === 0 ? "user" : "assistant",
+      content: `Message ${i}: ${"x".repeat(100)}`,
+    });
+  }
+  return messages;
+}
+
+const defaultArgs = {
+  stepNumber: 1,
+  steps: [],
+  model: {} as never,
+  experimental_context: undefined,
+};
+
+describe("createPrepareStep", () => {
+  it("does NOT return system field (never overrides system prompt)", async () => {
+    const prepareStep = createPrepareStep({});
+    const result = await prepareStep({
+      ...defaultArgs,
+      messages: makeMessages(2),
+    });
+    expect(result ?? {}).not.toHaveProperty("system");
+  });
+
+  it("returns undefined messages when nothing changes", async () => {
+    const prepareStep = createPrepareStep({});
+    const messages = makeMessages(2);
+    const result = await prepareStep({
+      ...defaultArgs,
+      messages,
+    });
+    expect(result?.messages).toBeUndefined();
+  });
+
+  it("injects plan mode hint as user message when active", async () => {
+    const state: PlanModeState = { isActive: true };
+    const prepareStep = createPrepareStep({ planModeState: state });
+    const messages = makeMessages(2);
+    const result = await prepareStep({
+      ...defaultArgs,
+      messages,
+    });
+
+    expect(result?.messages).toBeDefined();
+    const lastMsg = result.messages![result.messages!.length - 1];
+    expect(lastMsg.role).toBe("user");
+    expect(lastMsg.content).toContain("PLAN MODE ACTIVE");
+  });
+
+  it("does not inject plan mode hint when inactive", async () => {
+    const state: PlanModeState = { isActive: false };
+    const prepareStep = createPrepareStep({ planModeState: state });
+    const messages = makeMessages(2);
+    const result = await prepareStep({
+      ...defaultArgs,
+      messages,
+    });
+
+    expect(result?.messages).toBeUndefined();
+  });
+
+  it("calls extend callback after built-in logic", async () => {
+    const extendSpy = vi.fn(async () => ({
+      activeTools: ["Read", "Grep"],
+    }));
+
+    const prepareStep = createPrepareStep({
+      extend: extendSpy,
+    });
+
+    const messages = makeMessages(2);
+    const result = await prepareStep({
+      ...defaultArgs,
+      messages,
+    });
+
+    expect(extendSpy).toHaveBeenCalledTimes(1);
+    expect(result?.activeTools).toEqual(["Read", "Grep"]);
+  });
+
+  it("extend receives messages from built-in logic", async () => {
+    const state: PlanModeState = { isActive: true };
+    let receivedMessages: ModelMessage[] | undefined;
+
+    const prepareStep = createPrepareStep({
+      planModeState: state,
+      extend: async (args) => {
+        receivedMessages = args.messages;
+        return {};
+      },
+    });
+
+    const messages = makeMessages(2);
+    await prepareStep({ ...defaultArgs, messages });
+
+    // extend should get the messages with plan mode hint injected
+    expect(receivedMessages).toBeDefined();
+    expect(receivedMessages!.length).toBeGreaterThan(messages.length);
+    const lastMsg = receivedMessages![receivedMessages!.length - 1];
+    expect(lastMsg.content).toContain("PLAN MODE ACTIVE");
+  });
+
+  it("extend returning messages takes priority over built-in messages", async () => {
+    const state: PlanModeState = { isActive: true };
+    const customMessages: ModelMessage[] = [
+      { role: "user", content: "custom message only" },
+    ];
+
+    const prepareStep = createPrepareStep({
+      planModeState: state,
+      extend: async () => ({
+        messages: customMessages,
+      }),
+    });
+
+    const messages = makeMessages(2);
+    const result = await prepareStep({ ...defaultArgs, messages });
+
+    // extend's messages should win
+    expect(result?.messages).toEqual(customMessages);
+  });
+
+  it("plan mode + context status both active simultaneously", async () => {
+    const state: PlanModeState = { isActive: true };
+    const prepareStep = createPrepareStep({
+      planModeState: state,
+      contextStatus: {
+        maxTokens: 100, // Very low — will trigger high/critical
+      },
+    });
+
+    // Many messages to push past threshold
+    const messages = makeMessages(20);
+    const result = await prepareStep({ ...defaultArgs, messages });
+
+    expect(result?.messages).toBeDefined();
+    const injected = result.messages!.slice(messages.length);
+    const contents = injected.map((m) =>
+      typeof m.content === "string" ? m.content : "",
+    );
+
+    // Both should be present
+    expect(contents.some((c) => c.includes("PLAN MODE"))).toBe(true);
+  });
+
+  it("preserves original messages when only extend runs", async () => {
+    const prepareStep = createPrepareStep({
+      extend: async () => ({
+        activeTools: ["Read"],
+      }),
+    });
+
+    const messages = makeMessages(2);
+    const result = await prepareStep({ ...defaultArgs, messages });
+
+    // No message modification from built-in logic
+    expect(result?.messages).toBeUndefined();
+    expect(result?.activeTools).toEqual(["Read"]);
+  });
+
+  it("reacts to plan mode state changes between steps", async () => {
+    const state: PlanModeState = { isActive: false };
+    const prepareStep = createPrepareStep({ planModeState: state });
+    const messages = makeMessages(2);
+
+    // Step 1: inactive
+    const r1 = await prepareStep({ ...defaultArgs, messages });
+    expect(r1?.messages).toBeUndefined();
+
+    // Step 2: active
+    state.isActive = true;
+    const r2 = await prepareStep({ ...defaultArgs, messages });
+    expect(r2?.messages).toBeDefined();
+    const hasHint = r2.messages!.some(
+      (m) =>
+        m.role === "user" &&
+        typeof m.content === "string" &&
+        m.content.includes("PLAN MODE"),
+    );
+    expect(hasHint).toBe(true);
+
+    // Step 3: inactive again
+    state.isActive = false;
+    const r3 = await prepareStep({ ...defaultArgs, messages });
+    expect(r3?.messages).toBeUndefined();
+  });
+});
diff --git a/tests/context/with-context.test.ts b/tests/context/with-context.test.ts
new file mode 100644
index 0000000..3aa6697
--- /dev/null
+++ b/tests/context/with-context.test.ts
@@ -0,0 +1,344 @@
+import { describe, it, expect, vi } from "vitest";
+import { tool, zodSchema } from "ai";
+import { z } from "zod";
+import {
+  withContext,
+  applyContextLayers,
+  type ContextLayer,
+} from "@/context/index";
+import {
+  createMockSandbox,
+  executeTool,
+  assertSuccess,
+  assertError,
+} from "../helpers";
+import { createReadTool } from "@/tools/read";
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+type MockParams = { input?: string | null };
+
+function createMockTool(executeFn?: (params: MockParams) => Promise<unknown>) {
+  return tool({
+    description: "Mock tool for testing",
+    inputSchema: zodSchema(
+      z.object({
+        input: z.string().nullable().default(null),
+      }),
+    ),
+    execute:
+      (executeFn as (params: MockParams) => Promise<unknown>) ??
+      (async () => ({ output: "default" })),
+  });
+}
+
+// ---------------------------------------------------------------------------
+// withContext()
+// ---------------------------------------------------------------------------
+
+describe("withContext", () => {
+  it("passes through when no layers reject or transform", async () => {
+    const mockTool = createMockTool(async () => ({ output: "hello" }));
+    const wrapped = withContext(mockTool, "Mock", []);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertSuccess(result);
+    expect((result as Record<string, unknown>).output).toBe("hello");
+  });
+
+  it("returns error from beforeExecute without calling execute", async () => {
+    const executeSpy = vi.fn(async () => ({ output: "should not run" }));
+    const mockTool = createMockTool(executeSpy);
+    const layer: ContextLayer = {
+      beforeExecute: async () => ({ error: "blocked" }),
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertError(result);
+    expect(result.error).toBe("blocked");
+    expect(executeSpy).not.toHaveBeenCalled();
+  });
+
+  it("supports sync beforeExecute", async () => {
+    const mockTool = createMockTool();
+    const layer: ContextLayer = {
+      beforeExecute: () => ({ error: "sync block" }),
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertError(result);
+    expect(result.error).toBe("sync block");
+  });
+
+  it("transforms result through afterExecute", async () => {
+    const mockTool = createMockTool(async () => ({ output: "original" }));
+    const layer: ContextLayer = {
+      afterExecute: async (_toolName, _params, result) => ({
+        ...result,
+        _transformed: true,
+      }),
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertSuccess(result);
+    expect((result as Record<string, unknown>)._transformed).toBe(true);
+    expect((result as Record<string, unknown>).output).toBe("original");
+  });
+
+  it("supports sync afterExecute", async () => {
+    const mockTool = createMockTool(async () => ({ output: "original" }));
+    const layer: ContextLayer = {
+      afterExecute: (_toolName, _params, result) => ({
+        ...result,
+        _sync: true,
+      }),
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertSuccess(result);
+    expect((result as Record<string, unknown>)._sync).toBe(true);
+  });
+
+  it("stops at first beforeExecute rejection", async () => {
+    const mockTool = createMockTool();
+    const layer2Spy = vi.fn();
+    const layer1: ContextLayer = {
+      beforeExecute: async () => ({ error: "layer 1 blocked" }),
+    };
+    const layer2: ContextLayer = {
+      beforeExecute: async () => {
+        layer2Spy();
+        return { error: "layer 2 blocked" };
+      },
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer1, layer2]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertError(result);
+    expect(result.error).toBe("layer 1 blocked");
+    expect(layer2Spy).not.toHaveBeenCalled();
+  });
+
+  it("allows execution when beforeExecute returns undefined", async () => {
+    const mockTool = createMockTool(async () => ({ output: "allowed" }));
+    const layer: ContextLayer = {
+      beforeExecute: async () => undefined,
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertSuccess(result);
+    expect((result as Record<string, unknown>).output).toBe("allowed");
+  });
+
+  it("chains afterExecute transforms in order", async () => {
+    const mockTool = createMockTool(async () => ({ value: 0 }));
+    const layer1: ContextLayer = {
+      afterExecute: async (_tn, _p, result) => ({
+        ...result,
+        step1: true,
+      }),
+    };
+    const layer2: ContextLayer = {
+      afterExecute: async (_tn, _p, result) => ({
+        ...result,
+        step2: true,
+        sawStep1: result.step1,
+      }),
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer1, layer2]);
+    const result = (await executeTool(wrapped, {
+      input: "test",
+    })) as Record<string, unknown>;
+    assertSuccess(result);
+    expect(result.step1).toBe(true);
+    expect(result.step2).toBe(true);
+    expect(result.sawStep1).toBe(true);
+  });
+
+  it("returns tool unchanged when it has no execute function", () => {
+    const noExecTool = {
+      description: "Tool without execute",
+      parameters: zodSchema(z.object({})),
+    } as unknown as ReturnType<typeof tool>;
+
+    const layer: ContextLayer = {
+      beforeExecute: async () => ({ error: "should not matter" }),
+    };
+    const wrapped = withContext(noExecTool, "NoExec", [layer]);
+    expect(wrapped).toBe(noExecTool); // same reference
+  });
+
+  it("passes correct toolName to layers", async () => {
+    const mockTool = createMockTool(async () => ({ output: "ok" }));
+    let receivedName: string | undefined;
+    const layer: ContextLayer = {
+      beforeExecute: async (toolName) => {
+        receivedName = toolName;
+        return undefined;
+      },
+    };
+    const wrapped = withContext(mockTool, "MyToolName", [layer]);
+    await executeTool(wrapped, { input: "test" });
+    expect(receivedName).toBe("MyToolName");
+  });
+
+  it("beforeExecute error in layer propagates as rejection", async () => {
+    const mockTool = createMockTool(async () => ({ output: "ok" }));
+    const layer: ContextLayer = {
+      beforeExecute: async () => {
+        throw new Error("layer exploded");
+      },
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    await expect(executeTool(wrapped, { input: "test" })).rejects.toThrow(
+      "layer exploded",
+    );
+  });
+
+  it("afterExecute error in layer propagates", async () => {
+    const mockTool = createMockTool(async () => ({ output: "ok" }));
+    const layer: ContextLayer = {
+      afterExecute: async () => {
+        throw new Error("transform exploded");
+      },
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    await expect(executeTool(wrapped, { input: "test" })).rejects.toThrow(
+      "transform exploded",
+    );
+  });
+
+  it("beforeExecute allows and afterExecute transforms on same layer", async () => {
+    const mockTool = createMockTool(async () => ({ output: "original" }));
+    const layer: ContextLayer = {
+      beforeExecute: async () => undefined, // allow
+      afterExecute: async (_tn, _p, result) => ({
+        ...result,
+        _combined: true,
+      }),
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = (await executeTool(wrapped, {
+      input: "test",
+    })) as Record<string, unknown>;
+    assertSuccess(result);
+    expect(result.output).toBe("original");
+    expect(result._combined).toBe(true);
+  });
+
+  it("afterExecute does not run when beforeExecute rejects", async () => {
+    const afterSpy = vi.fn();
+    const mockTool = createMockTool(async () => ({ output: "ok" }));
+    const layer: ContextLayer = {
+      beforeExecute: async () => ({ error: "blocked" }),
+      afterExecute: async (_tn, _p, result) => {
+        afterSpy();
+        return result;
+      },
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    const result = await executeTool(wrapped, { input: "test" });
+    assertError(result);
+    expect(afterSpy).not.toHaveBeenCalled();
+  });
+
+  it("passes params to beforeExecute and afterExecute", async () => {
+    const mockTool = createMockTool(async () => ({ output: "ok" }));
+    let beforeParams: Record<string, unknown> | undefined;
+    let afterParams: Record<string, unknown> | undefined;
+    const layer: ContextLayer = {
+      beforeExecute: async (_tn, params) => {
+        beforeParams = params;
+        return undefined;
+      },
+      afterExecute: async (_tn, params, result) => {
+        afterParams = params;
+        return result;
+      },
+    };
+    const wrapped = withContext(mockTool, "Mock", [layer]);
+    await executeTool(wrapped, { input: "hello" });
+    expect(beforeParams?.input).toBe("hello");
+    expect(afterParams?.input).toBe("hello");
+  });
+});
+
+// ---------------------------------------------------------------------------
+// applyContextLayers()
+// ---------------------------------------------------------------------------
+
+describe("applyContextLayers", () => {
+  it("wraps all tools in the ToolSet", async () => {
+    const sandbox = createMockSandbox({
+      files: { "/workspace/file.ts": "content" },
+    });
+
+    const tools = {
+      Mock1: createMockTool(async () => ({ output: "one" })),
+      Mock2: createMockTool(async () => ({ output: "two" })),
+    };
+
+    const blockAll: ContextLayer = {
+      beforeExecute: async () => ({ error: "blocked" }),
+    };
+    const wrapped = applyContextLayers(tools, [blockAll]);
+
+    for (const [_name, wrappedTool] of Object.entries(wrapped)) {
+      const result = await executeTool(wrappedTool, { input: "test" });
+      assertError(result);
+      expect(result.error).toBe("blocked");
+    }
+  });
+
+  it("returns tools unchanged when layers array is empty", () => {
+    const tools = {
+      Mock: createMockTool(),
+    };
+    const result = applyContextLayers(tools, []);
+    expect(result).toBe(tools); // same reference
+  });
+
+  it("preserves tool keys in the returned ToolSet", async () => {
+    const tools = {
+      Alpha: createMockTool(async () => ({ output: "a" })),
+      Beta: createMockTool(async () => ({ output: "b" })),
+    };
+
+    const layer: ContextLayer = {
+      afterExecute: async (_tn, _p, result) => ({
+        ...result,
+        _wrapped: true,
+      }),
+    };
+
+    const wrapped = applyContextLayers(tools, [layer]);
+    expect(Object.keys(wrapped)).toEqual(["Alpha", "Beta"]);
+
+    const alphaResult = (await executeTool(wrapped.Alpha, {
+      input: "test",
+    })) as Record<string, unknown>;
+    assertSuccess(alphaResult);
+    expect(alphaResult._wrapped).toBe(true);
+    expect(alphaResult.output).toBe("a");
+  });
+
+  it("passes correct tool name per tool to layers", async () => {
+    const tools = {
+      Foo: createMockTool(async () => ({ output: "foo" })),
+      Bar: createMockTool(async () => ({ output: "bar" })),
+    };
+
+    const names: string[] = [];
+    const layer: ContextLayer = {
+      beforeExecute: async (toolName) => {
+        names.push(toolName);
+        return undefined;
+      },
+    };
+
+    const wrapped = applyContextLayers(tools, [layer]);
+    await executeTool(wrapped.Foo, { input: "test" });
+    await executeTool(wrapped.Bar, { input: "test" });
+    expect(names).toEqual(["Foo", "Bar"]);
+  });
+});
diff --git a/tests/tools/read.test.ts b/tests/tools/read.test.ts
index 7719df8..2d135c2 100644
--- a/tests/tools/read.test.ts
+++ b/tests/tools/read.test.ts
@@ -334,7 +334,10 @@ describe("Read Tool", () => {
   describe("total output truncation", () => {
     it("should truncate total output exceeding maxOutputLength", async () => {
       // Create a file with many lines that exceed 500 chars total
-      const fileLines = Array.from({ length: 50 }, (_, i) => `Line ${i + 1}: ${"a".repeat(20)}`);
+      const fileLines = Array.from(
+        { length: 50 },
+        (_, i) => `Line ${i + 1}: ${"a".repeat(20)}`,
+      );
       const fileContent = fileLines.join("\n");
       sandbox.setFile("/workspace/big.ts", fileContent);
 

From 5160f57a5109ddbccd2b7b4adac5bc84088a1591 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Fri, 10 Apr 2026 15:27:49 -0400
Subject: [PATCH 02/11] Fix typecheck errors in prepare-step test assertions

Four spots used result.messages! without first narrowing result itself,
which PrepareStepFunction types as PrepareStepResult | undefined. The
surrounding expect(result?.messages).toBeDefined() doesn't propagate
narrowing to TypeScript. Added explicit non-null assertions on result.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 tests/context/prepare-step.test.ts | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/context/prepare-step.test.ts b/tests/context/prepare-step.test.ts
index 5c41484..bcf78ce 100644
--- a/tests/context/prepare-step.test.ts
+++ b/tests/context/prepare-step.test.ts
@@ -51,7 +51,7 @@ describe("createPrepareStep", () => {
     });
 
     expect(result?.messages).toBeDefined();
-    const lastMsg = result.messages![result.messages!.length - 1];
+    const lastMsg = result!.messages![result!.messages!.length - 1];
     expect(lastMsg.role).toBe("user");
     expect(lastMsg.content).toContain("PLAN MODE ACTIVE");
   });
@@ -143,7 +143,7 @@ describe("createPrepareStep", () => {
     const result = await prepareStep({ ...defaultArgs, messages });
 
     expect(result?.messages).toBeDefined();
-    const injected = result.messages!.slice(messages.length);
+    const injected = result!.messages!.slice(messages.length);
     const contents = injected.map((m) =>
       typeof m.content === "string" ? m.content : "",
     );
@@ -180,7 +180,7 @@ describe("createPrepareStep", () => {
     state.isActive = true;
     const r2 = await prepareStep({ ...defaultArgs, messages });
     expect(r2?.messages).toBeDefined();
-    const hasHint = r2.messages!.some(
+    const hasHint = r2!.messages!.some(
       (m) =>
         m.role === "user" &&
         typeof m.content === "string" &&

From c39039cdcfe4c89dbd981052723852702b52e607 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Fri, 10 Apr 2026 15:28:33 -0400
Subject: [PATCH 03/11] Document exact script names in AGENTS.md

Clarifies that scripts use no hyphens (typecheck, not type-check) and
lists the full set of package.json script names. Adds a one-liner for
running all four pre-push CI gates locally.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 AGENTS.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index 917d6e8..6a12b20 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -72,8 +72,20 @@ bun run typecheck   # ALWAYS run before bun run build
 bun run build       # Bun bundles to dist/index.js + tsc emits .d.ts
 ```
 
+**Script names are exact — no hyphens.** It's `typecheck`, not `type-check`. Running the wrong name will just error with "Script not found". See `package.json` for the full list.
+
 **Critical**: `bun run build` does **not** fail on type errors during bundling. Run `bun run typecheck` first or type regressions will ship silently.
 
+### Full Pre-Push Check
+
+Before pushing, run all four gates locally — CI will reject otherwise:
+
+```bash
+bun run typecheck && bun run check && bun run test && bun run check:agents
+```
+
+Exact script names (from `package.json`): `typecheck`, `build`, `test`, `test:watch`, `test:coverage`, `format`, `format:check`, `lint`, `lint:check`, `check`, `check:ci`, `link-agents`, `check:agents`.
+
 ### Tests
 
 Use Vitest via `bun run test` — **not** `bun test` (which runs Bun's built-in runner and will miss our suite).

From f1912f69acf56d1dc3685dd940eebd92b4a8beae Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 09:31:00 -0400
Subject: [PATCH 04/11] Fix output policy dropping original fields on
 truncation

The fallback branch in injectTruncatedOutput replaced the entire result
object with { _truncated, _hint }, silently erasing fields like { error }
and breaking the return-error-not-throw contract. Spread the original
result so all fields are preserved alongside truncation metadata.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/context/AGENTS.md               | 2 +-
 src/context/output-policy.ts        | 4 ++--
 tests/context/output-policy.test.ts | 7 ++++++-
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/context/AGENTS.md b/src/context/AGENTS.md
index 25135e5..538d89e 100644
--- a/src/context/AGENTS.md
+++ b/src/context/AGENTS.md
@@ -99,7 +99,7 @@ prepare-step.ts     ──→ ../utils/compact-conversation + ../utils/context-s
 2. If per-tool: add entry to `BUILT_IN_HINTS` in `output-policy.ts:58` or pass `hints`/`buildHint` via config
 3. If shape-specific: extend `extractText` in `output-policy.ts:127` to recognize new result shapes
 
-**Gotchas**: `excludeTools` skips truncation entirely. Truncated output is re-injected into the original field (`stdout` or `content`); for JSON-serialized results, it lands in `_truncated` + `_hint`.
+**Gotchas**: `excludeTools` skips truncation entirely. Truncated output is re-injected into the original field (`stdout` or `content`); for other structured results, original fields are preserved and `_truncated` + `_hint` are added alongside them.
 
 ### Add a new instruction source
 1. Extend `DiscoveredInstructions["sources"][].scope` union in `instructions.ts:19`
diff --git a/src/context/output-policy.ts b/src/context/output-policy.ts
index 747f7cc..d533477 100644
--- a/src/context/output-policy.ts
+++ b/src/context/output-policy.ts
@@ -149,8 +149,8 @@ function injectTruncatedOutput(
     return { ...result, content: truncated, _hint: hint };
   }
 
-  // For serialized JSON results, return the truncated string + hint
-  return { _truncated: truncated, _hint: hint };
+  // For other structured results, preserve original fields and add truncation metadata
+  return { ...result, _truncated: truncated, _hint: hint };
 }
 
 /**
diff --git a/tests/context/output-policy.test.ts b/tests/context/output-policy.test.ts
index 59d520a..803415d 100644
--- a/tests/context/output-policy.test.ts
+++ b/tests/context/output-policy.test.ts
@@ -246,6 +246,9 @@ describe("createOutputPolicy", () => {
     const transformed = await transform(layer, "Custom", {}, result);
     expect(transformed._truncated).toBeDefined();
     expect(transformed._hint).toBeDefined();
+    // Original fields are preserved alongside truncation metadata
+    expect(transformed.data).toBe(result.data);
+    expect(transformed.total).toBe(42);
   });
 
   // -----------------------------------------------------------------------
@@ -283,7 +286,7 @@ describe("createOutputPolicy", () => {
     expect(transformed).toEqual(result);
   });
 
-  it("large error results still get truncated", async () => {
+  it("large error results preserve error field after truncation", async () => {
     const layer = createOutputPolicy({
       redirectionThreshold: 10,
       maxOutputLength: 50,
@@ -292,6 +295,8 @@ describe("createOutputPolicy", () => {
     const result = { error: "x".repeat(100) };
     const transformed = await transform(layer, "Bash", {}, result);
     expect(transformed._hint).toBeDefined();
+    // The error field must survive so models can reason about the failure
+    expect(transformed.error).toBe(result.error);
   });
 
   // -----------------------------------------------------------------------

From 7ed0e555b98406b9eddbd5825092a08f7a997ba4 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 09:32:41 -0400
Subject: [PATCH 05/11] Quote stash directory path in mkdir to prevent shell
 injection

The stashOutput mkdir -p call interpolated the directory path unescaped
into a shell command, allowing spaces, $(), backticks, or semicolons in
pathFor() results to break or execute unintended fragments. Single-quote
the path with interior quote escaping.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/context/output-policy.ts        |  2 +-
 tests/context/output-policy.test.ts | 23 +++++++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/context/output-policy.ts b/src/context/output-policy.ts
index d533477..310985c 100644
--- a/src/context/output-policy.ts
+++ b/src/context/output-policy.ts
@@ -201,7 +201,7 @@ export function createOutputPolicy(config?: OutputPolicyConfig): ContextLayer {
         // Ensure directory exists (once)
         const dir = stashedPath.replace(/\/[^/]+$/, "");
         if (!stashDirCreated || dir !== stashDir) {
-          await stash.sandbox.exec(`mkdir -p ${dir}`);
+          await stash.sandbox.exec(`mkdir -p '${dir.replace(/'/g, "'\\''")}'`);
           if (dir === stashDir) stashDirCreated = true;
         }
 
diff --git a/tests/context/output-policy.test.ts b/tests/context/output-policy.test.ts
index 803415d..3aa0283 100644
--- a/tests/context/output-policy.test.ts
+++ b/tests/context/output-policy.test.ts
@@ -428,6 +428,29 @@ describe("createOutputPolicy", () => {
       expect(history.some((e) => e.command.includes("mkdir -p"))).toBe(true);
     });
 
+    it("quotes stash directory to prevent shell injection", async () => {
+      const sandbox = createMockSandbox();
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          dir: "/tmp/path with spaces/$(whoami)",
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      await transform(layer, "Bash", {}, result);
+
+      const history = sandbox.getExecHistory();
+      const mkdirCmd = history.find((e) => e.command.includes("mkdir -p"));
+      expect(mkdirCmd).toBeDefined();
+      // Path must be single-quoted so spaces and $() are not interpreted
+      expect(mkdirCmd!.command).toContain("'");
+      expect(mkdirCmd!.command).not.toMatch(/mkdir -p [^']/);
+    });
+
     it("does not write when output is below threshold", async () => {
       const sandbox = createMockSandbox();
       const layer = createOutputPolicy({

From 7c04795fb73ffdf93899ff73d1432fba4bd32f07 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 09:36:50 -0400
Subject: [PATCH 06/11] Decouple execution policy from plan mode

Make planModeState optional in createExecutionPolicy so shouldBlock
works standalone. Update createAgentTools wiring to install the layer
when either plan mode or a custom execution policy is configured.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/context/execution-policy.ts        |  4 ++--
 src/tools/index.ts                     |  4 ++--
 tests/context/execution-policy.test.ts | 20 ++++++++++++++++++++
 3 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/src/context/execution-policy.ts b/src/context/execution-policy.ts
index 8a40b95..fb81710 100644
--- a/src/context/execution-policy.ts
+++ b/src/context/execution-policy.ts
@@ -21,7 +21,7 @@ const DEFAULT_PLAN_MODE_BLOCKED = ["Bash", "Write", "Edit"];
  * All tools remain registered (prompt cache stable) — only execution is gated.
  */
 export function createExecutionPolicy(
-  planModeState: PlanModeState,
+  planModeState?: PlanModeState,
   config?: ExecutionPolicyConfig,
 ): ContextLayer {
   const blocked = new Set(
@@ -31,7 +31,7 @@ export function createExecutionPolicy(
   return {
     beforeExecute: (toolName, params) => {
       // Plan mode gate
-      if (planModeState.isActive && blocked.has(toolName)) {
+      if (planModeState?.isActive && blocked.has(toolName)) {
         return {
           error: `${toolName} is not available in plan mode. Use read-only tools (Read, Grep, Glob) to gather information, then call ExitPlanMode when your plan is ready.`,
         };
diff --git a/src/tools/index.ts b/src/tools/index.ts
index 5012533..c871cf7 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -250,8 +250,8 @@ export async function createAgentTools(
   const contextLayers: ContextLayer[] = [];
 
   if (config?.context) {
-    // Execution policy (only if planMode is enabled)
-    if (planModeState && config.context.executionPolicy) {
+    // Execution policy (plan-mode gating and/or custom shouldBlock)
+    if (planModeState || config.context.executionPolicy) {
       contextLayers.push(
         createExecutionPolicy(planModeState, config.context.executionPolicy),
       );
diff --git a/tests/context/execution-policy.test.ts b/tests/context/execution-policy.test.ts
index 969cd36..25ad19f 100644
--- a/tests/context/execution-policy.test.ts
+++ b/tests/context/execution-policy.test.ts
@@ -127,6 +127,26 @@ describe("createExecutionPolicy", () => {
     expect(shouldBlock).toHaveBeenCalledWith("Read", {});
   });
 
+  it("shouldBlock works without plan mode state", async () => {
+    const shouldBlock = vi.fn((toolName: string) => {
+      if (toolName === "Bash") return "Bash is disabled";
+      return undefined;
+    });
+
+    const layer = createExecutionPolicy(undefined, { shouldBlock });
+
+    expect(await gate(layer, "Bash")).toEqual({ error: "Bash is disabled" });
+    expect(await gate(layer, "Read")).toBeUndefined();
+  });
+
+  it("allows everything when no plan mode and no shouldBlock", async () => {
+    const layer = createExecutionPolicy();
+
+    for (const toolName of ["Bash", "Write", "Edit", "Read"]) {
+      expect(await gate(layer, toolName)).toBeUndefined();
+    }
+  });
+
   it("error message includes guidance about read-only tools", async () => {
     const state: PlanModeState = { isActive: true };
     const layer = createExecutionPolicy(state);

From 58df7e05d40db547fd51bcc0f6f2eafc634b3957 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 09:50:05 -0400
Subject: [PATCH 07/11] Degrade gracefully when stash output fails

Wrap the stash-to-disk path in try/catch so mkdir or writeFile failures
log a warning instead of converting a successful tool result into an
unhandled exception.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/context/output-policy.ts        | 31 +++++++++------
 tests/context/output-policy.test.ts | 60 +++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/src/context/output-policy.ts b/src/context/output-policy.ts
index 310985c..30881b9 100644
--- a/src/context/output-policy.ts
+++ b/src/context/output-policy.ts
@@ -193,19 +193,26 @@ export function createOutputPolicy(config?: OutputPolicyConfig): ContextLayer {
       // Stash full output to disk if configured for this tool
       let stashedPath: string | undefined;
       if (stash && stashTools.has(toolName)) {
-        // Determine file path
-        stashedPath =
-          stash.pathFor?.(toolName, params, result) ??
-          defaultStashPath(stashDir, toolName);
-
-        // Ensure directory exists (once)
-        const dir = stashedPath.replace(/\/[^/]+$/, "");
-        if (!stashDirCreated || dir !== stashDir) {
-          await stash.sandbox.exec(`mkdir -p '${dir.replace(/'/g, "'\\''")}'`);
-          if (dir === stashDir) stashDirCreated = true;
+        try {
+          // Determine file path
+          stashedPath =
+            stash.pathFor?.(toolName, params, result) ??
+            defaultStashPath(stashDir, toolName);
+
+          // Ensure directory exists (once)
+          const dir = stashedPath.replace(/\/[^/]+$/, "");
+          if (!stashDirCreated || dir !== stashDir) {
+            await stash.sandbox.exec(
+              `mkdir -p '${dir.replace(/'/g, "'\\''")}'`,
+            );
+            if (dir === stashDir) stashDirCreated = true;
+          }
+
+          await stash.sandbox.writeFile(stashedPath, text);
+        } catch (err) {
+          console.warn(`[bashkit] stashOutput failed: ${err}`);
+          stashedPath = undefined;
         }
-
-        await stash.sandbox.writeFile(stashedPath, text);
       }
 
       // Truncate
diff --git a/tests/context/output-policy.test.ts b/tests/context/output-policy.test.ts
index 3aa0283..44c5448 100644
--- a/tests/context/output-policy.test.ts
+++ b/tests/context/output-policy.test.ts
@@ -541,5 +541,65 @@ describe("createOutputPolicy", () => {
       );
       expect(stashPath).toBeDefined();
     });
+
+    it("degrades gracefully when mkdir fails", async () => {
+      const sandbox = createMockSandbox({
+        execHandler: async () => {
+          throw new Error("Permission denied");
+        },
+      });
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      const transformed = await transform(layer, "Bash", {}, result);
+
+      // Still returns a truncated result instead of throwing
+      expect(typeof transformed.stdout).toBe("string");
+      expect(transformed._hint).toBeDefined();
+      // Hint should not reference a stash file
+      expect((transformed._hint as string)).not.toContain("Full output saved to");
+      // Warning was logged
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("[bashkit] stashOutput failed"),
+      );
+
+      warnSpy.mockRestore();
+    });
+
+    it("degrades gracefully when writeFile fails", async () => {
+      const sandbox = createMockSandbox();
+      vi.spyOn(sandbox, "writeFile").mockRejectedValue(new Error("Disk full"));
+      const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+
+      const layer = createOutputPolicy({
+        redirectionThreshold: 10,
+        maxOutputLength: 50,
+        stashOutput: {
+          sandbox,
+          tools: ["Bash"],
+        },
+      });
+
+      const result = { stdout: "x".repeat(100) };
+      const transformed = await transform(layer, "Bash", {}, result);
+
+      expect(typeof transformed.stdout).toBe("string");
+      expect(transformed._hint).toBeDefined();
+      expect((transformed._hint as string)).not.toContain("Full output saved to");
+      expect(warnSpy).toHaveBeenCalledWith(
+        expect.stringContaining("[bashkit] stashOutput failed"),
+      );
+
+      warnSpy.mockRestore();
+    });
   });
 });

From a905e77db1388b6fe1d2c76213db43473e3e65f5 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 09:54:41 -0400
Subject: [PATCH 08/11] Clarify that instructions and environment are opt-in

The JSDoc comments implied default-on behavior but the implementation
disables both when omitted. Update comments to say so explicitly.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/context/build-context.ts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/context/build-context.ts b/src/context/build-context.ts
index 06c6256..c505b02 100644
--- a/src/context/build-context.ts
+++ b/src/context/build-context.ts
@@ -13,9 +13,9 @@ import {
 import { buildToolGuidance, type ToolGuidanceConfig } from "./tool-guidance";
 
 export interface SystemContextConfig {
-  /** Instruction file discovery. true for defaults, or provide config. */
+  /** Instruction file discovery. Pass true for defaults, or provide config. Disabled when omitted. */
   instructions?: boolean | InstructionDiscoveryConfig;
-  /** Environment context. true for defaults, or provide config. */
+  /** Environment context. Pass true for defaults, or provide config. Disabled when omitted. */
   environment?: boolean | EnvironmentContextConfig;
   /** Tool guidance config */
   toolGuidance?: ToolGuidanceConfig;

From 17836d53ae45de1e01edf6ff5cc628ebb6349e3f Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 09:57:14 -0400
Subject: [PATCH 09/11] Forward custom environment fields through
 buildSystemContext

buildSystemContext called formatEnvironment without passing the custom
fields from EnvironmentContextConfig, silently dropping them from the
prompt output.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/context/build-context.ts        |  6 +++++-
 tests/context/build-context.test.ts | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/src/context/build-context.ts b/src/context/build-context.ts
index c505b02..db9b0e2 100644
--- a/src/context/build-context.ts
+++ b/src/context/build-context.ts
@@ -71,7 +71,11 @@ export async function buildSystemContext(
     ? `# Project Instructions\n<INSTRUCTIONS>\n${instructions.text}\n</INSTRUCTIONS>`
     : null;
 
-  const environmentText = env ? formatEnvironment(env) : null;
+  const envCustom =
+    config?.environment && typeof config.environment === "object"
+      ? config.environment.custom
+      : undefined;
+  const environmentText = env ? formatEnvironment(env, envCustom) : null;
 
   const toolGuidanceText = config?.toolGuidance
     ? buildToolGuidance(config.toolGuidance)
diff --git a/tests/context/build-context.test.ts b/tests/context/build-context.test.ts
index 4cbe43b..2f7c846 100644
--- a/tests/context/build-context.test.ts
+++ b/tests/context/build-context.test.ts
@@ -426,6 +426,20 @@ describe("buildSystemContext", () => {
     expect(ctx.meta.environmentContext!.cwd).toBe("/project");
   });
 
+  it("forwards custom environment fields into formatted output", async () => {
+    const sandbox = createMockSandbox();
+    Object.defineProperty(sandbox, "workingDirectory", {
+      value: "/project",
+    });
+
+    const ctx = await buildSystemContext(sandbox, {
+      environment: { custom: { app_version: "1.2.3", region: "us-east-1" } },
+    });
+
+    expect(ctx.environment).toContain("<app_version>1.2.3</app_version>");
+    expect(ctx.environment).toContain("<region>us-east-1</region>");
+  });
+
   it("returns empty combined when no sections configured", async () => {
     const sandbox = createMockSandbox();
     const ctx = await buildSystemContext(sandbox);

From be2c340861c78f05bc3134cf55faacb830072b59 Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 10:08:37 -0400
Subject: [PATCH 10/11] Add context layer docs page and bump version to 0.7.0

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 docs/src/app/MobileNav.tsx           |   1 +
 docs/src/app/SideNav.tsx             |  13 ++
 docs/src/app/api-reference/page.tsx  |  10 +
 docs/src/app/components/NavIcons.tsx |   7 +
 docs/src/app/context/page.tsx        | 333 +++++++++++++++++++++++++++
 package.json                         |   2 +-
 6 files changed, 365 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/app/context/page.tsx

diff --git a/docs/src/app/MobileNav.tsx b/docs/src/app/MobileNav.tsx
index db374e9..9601b18 100644
--- a/docs/src/app/MobileNav.tsx
+++ b/docs/src/app/MobileNav.tsx
@@ -11,6 +11,7 @@ const links = [
   { href: "/getting-started", label: "Getting Started" },
   { href: "/tools", label: "Tools" },
   { href: "/sandboxes", label: "Sandboxes" },
+  { href: "/context", label: "Context" },
   { href: "/api-reference", label: "API Reference" },
 ];
 
diff --git a/docs/src/app/SideNav.tsx b/docs/src/app/SideNav.tsx
index 4569c5b..28a7807 100644
--- a/docs/src/app/SideNav.tsx
+++ b/docs/src/app/SideNav.tsx
@@ -63,6 +63,19 @@ const links: {
       { id: "e2b-sandbox", text: "E2BSandbox" },
     ],
   },
+  {
+    href: "/context",
+    label: "Context",
+    items: [
+      { id: "overview", text: "Overview" },
+      { id: "context-layers", text: "Context Layers" },
+      { id: "execution-policy", text: "Execution Policy" },
+      { id: "output-policy", text: "Output Policy" },
+      { id: "system-prompt", text: "System Prompt" },
+      { id: "prepare-step", text: "prepareStep" },
+      { id: "full-example", text: "Full Example" },
+    ],
+  },
   {
     href: "/api-reference",
     label: "API Reference",
diff --git a/docs/src/app/api-reference/page.tsx b/docs/src/app/api-reference/page.tsx
index 968e316..40f0ee5 100644
--- a/docs/src/app/api-reference/page.tsx
+++ b/docs/src/app/api-reference/page.tsx
@@ -74,6 +74,11 @@ const { tools, budget } = await createAgentTools(sandbox, {
               <code>openRouterModels</code> &mdash; Model registry map (when
               modelRegistry config provided)
             </li>
+            <li>
+              <code>contextLayers</code> &mdash; Applied context layers (empty
+              array when no context config). Use with{" "}
+              <code>applyContextLayers()</code> for late-added tools.
+            </li>
           </ul>
         </section>
 
@@ -104,6 +109,11 @@ const { tools, budget } = await createAgentTools(sandbox, {
               Budget tracking configuration. Requires modelRegistry or
               pricingProvider.
             </Prop>
+            <Prop name="context" type="ContextConfig">
+              Context layer config. Opt-in &mdash; wraps tools with execution
+              and output policies. See the{" "}
+              <a href="/context">Context</a> page.
+            </Prop>
           </div>
 
           <h3>ToolConfig (per-tool)</h3>
diff --git a/docs/src/app/components/NavIcons.tsx b/docs/src/app/components/NavIcons.tsx
index 4e443b6..27282cf 100644
--- a/docs/src/app/components/NavIcons.tsx
+++ b/docs/src/app/components/NavIcons.tsx
@@ -38,6 +38,13 @@ export const navIcons: Record<string, React.ReactNode> = {
       <line x1="12" y1="22.08" x2="12" y2="12" />
     </svg>
   ),
+  "/context": (
+    <svg {...iconProps}>
+      <polygon points="12 2 2 7 12 12 22 7 12 2" />
+      <polyline points="2 17 12 22 22 17" />
+      <polyline points="2 12 12 17 22 12" />
+    </svg>
+  ),
   "/api-reference": (
     <svg {...iconProps}>
       <polyline points="16 18 22 12 16 6" />
diff --git a/docs/src/app/context/page.tsx b/docs/src/app/context/page.tsx
new file mode 100644
index 0000000..fe0bb60
--- /dev/null
+++ b/docs/src/app/context/page.tsx
@@ -0,0 +1,333 @@
+"use client";
+
+import { CodeBlock } from "../components/CodeBlock";
+import { Footer } from "../Footer";
+
+export default function Context() {
+  return (
+    <>
+      <article className="article">
+        <header>
+          <h1>Context</h1>
+          <p className="tagline">
+            System prompt assembly, tool execution gating, and output policies.
+          </p>
+        </header>
+
+        <section>
+          <h2 id="overview">Overview</h2>
+          <p>
+            The context layer is an opt-in system that wraps your tools with
+            cross-cutting behavior: blocking tools based on state (execution
+            policy), truncating large outputs with redirection hints (output
+            policy), and assembling a static system prompt from project docs and
+            environment info.
+          </p>
+          <p>
+            Enable it by passing a <code>context</code> config to{" "}
+            <code>createAgentTools</code>:
+          </p>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`const { tools, contextLayers } = await createAgentTools(sandbox, {
+  context: {
+    executionPolicy: { /* ... */ },
+    outputPolicy: { maxOutputLength: 50000 },
+    layers: [myCustomLayer],
+    extraTools: { MyTool: myTool },
+  },
+});`}
+          />
+          <p>
+            When <code>context</code> is omitted, tools work exactly as before
+            &mdash; no wrapping, no overhead.
+          </p>
+        </section>
+
+        <section>
+          <h2 id="context-layers">Context Layers</h2>
+          <p>
+            A <code>ContextLayer</code> intercepts tool execution with two
+            optional hooks:
+          </p>
+          <ul>
+            <li>
+              <code>beforeExecute</code> &mdash; return{" "}
+              <code>{`{ error: string }`}</code> to block a tool call, or{" "}
+              <code>undefined</code> to allow it
+            </li>
+            <li>
+              <code>afterExecute</code> &mdash; transform the tool result (e.g.,
+              truncate output)
+            </li>
+          </ul>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`import { withContext, applyContextLayers } from 'bashkit';
+import type { ContextLayer } from 'bashkit';
+
+const loggingLayer: ContextLayer = {
+  beforeExecute: (toolName, params) => {
+    console.log(\`Calling \${toolName}\`);
+    return undefined; // allow execution
+  },
+  afterExecute: (toolName, params, result) => {
+    console.log(\`\${toolName} returned\`);
+    return result; // pass through unchanged
+  },
+};
+
+// Wrap a single tool
+const wrappedTool = withContext(myTool, 'MyTool', [loggingLayer]);
+
+// Wrap an entire ToolSet
+const wrappedTools = applyContextLayers(tools, [loggingLayer]);`}
+          />
+          <p>
+            Layers compose in order: first <code>beforeExecute</code> rejection
+            wins, <code>afterExecute</code> transforms pipe through
+            sequentially.
+          </p>
+        </section>
+
+        <section>
+          <h2 id="execution-policy">Execution Policy</h2>
+          <p>
+            Gates tool execution based on state. The most common use case is
+            plan mode &mdash; blocking write tools while allowing read-only
+            tools.
+          </p>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`import { createExecutionPolicy } from 'bashkit';
+
+// Plan mode: blocks Bash, Write, Edit by default
+const policy = createExecutionPolicy(planModeState);
+
+// Custom blocked tools
+const policy = createExecutionPolicy(planModeState, {
+  planModeBlockedTools: ['Bash', 'Write', 'Edit', 'WebFetch'],
+});
+
+// Custom predicate (independent of plan mode)
+const policy = createExecutionPolicy(undefined, {
+  shouldBlock: (toolName, params) => {
+    if (toolName === 'Bash' && String(params.command).includes('rm')) {
+      return 'Destructive commands are not allowed';
+    }
+    return undefined;
+  },
+});`}
+          />
+          <p>
+            Tools stay registered in the tool set (prompt cache stable) &mdash;
+            only execution is gated.
+          </p>
+        </section>
+
+        <section>
+          <h2 id="output-policy">Output Policy</h2>
+          <p>
+            Handles large tool outputs by truncating and injecting hints that
+            tell the model how to access the full result.
+          </p>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`import { createOutputPolicy } from 'bashkit';
+
+// Defaults: maxOutputLength 30000, redirectionThreshold 20000
+const policy = createOutputPolicy();
+
+// Custom thresholds
+const policy = createOutputPolicy({
+  maxOutputLength: 50000,
+  redirectionThreshold: 40000,
+  excludeTools: ['Read'],  // never truncate Read output
+});`}
+          />
+          <p>
+            When output exceeds <code>redirectionThreshold</code>, it gets
+            truncated to <code>maxOutputLength</code> and a{" "}
+            <code>_hint</code> field is added with tool-specific guidance (e.g.,
+            &quot;use <code>head</code>/<code>tail</code> to see specific
+            parts&quot;).
+          </p>
+
+          <h3>Custom Hints</h3>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`const policy = createOutputPolicy({
+  // Simple per-tool hint strings
+  hints: {
+    Bash: 'Re-run with | head or | tail to see specific parts.',
+    Grep: 'Narrow your pattern to reduce results.',
+  },
+  // Full control callback
+  buildHint: (toolName, params, originalLength, result) => {
+    if (toolName === 'Bash' && params.command === 'git log') {
+      return 'Use git log with --oneline or -n to limit output.';
+    }
+    return undefined; // fall through to hints map / defaults
+  },
+});`}
+          />
+
+          <h3>Stash to Disk</h3>
+          <p>
+            Optionally save full output to disk before truncating, so the model
+            can <code>Read</code> the file later:
+          </p>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`const policy = createOutputPolicy({
+  stashOutput: {
+    sandbox,
+    tools: ['Bash', 'Grep'],  // which tools get disk stash
+    dir: '/tmp/.bashkit/tool-output',  // default
+  },
+});`}
+          />
+        </section>
+
+        <section>
+          <h2 id="system-prompt">System Prompt Assembly</h2>
+          <p>
+            <code>buildSystemContext</code> assembles a static system prompt from
+            three sources: discovered project instructions (AGENTS.md /
+            CLAUDE.md files), environment info (cwd, platform, git branch), and
+            tool guidance.
+          </p>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`import { buildSystemContext } from 'bashkit';
+
+const ctx = await buildSystemContext(sandbox, {
+  instructions: true,  // discover AGENTS.md / CLAUDE.md files
+  environment: true,   // collect cwd, shell, platform, git info
+  toolGuidance: {
+    tools: { Bash: 'Run shell commands', Read: 'Read files' },
+  },
+});
+
+// Use in streamText
+const result = await streamText({
+  model,
+  system: ctx.combined,  // all sections joined
+  tools,
+  messages,
+});
+
+// Or access individual sections
+ctx.instructions  // project instructions text
+ctx.environment   // environment XML block
+ctx.toolGuidance  // tool hint list`}
+          />
+          <p>
+            Call once at init &mdash; the output is deterministic and designed to
+            stay stable across turns for Anthropic prompt caching.
+          </p>
+        </section>
+
+        <section>
+          <h2 id="prepare-step">prepareStep</h2>
+          <p>
+            <code>createPrepareStep</code> returns a callback for the AI
+            SDK&apos;s <code>prepareStep</code> option. It composes message
+            compaction, context status monitoring, and plan mode hints.
+          </p>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`import { createPrepareStep } from 'bashkit';
+
+const prepareStep = createPrepareStep({
+  compaction: {
+    model,
+    maxTokens: 128000,
+    threshold: 0.7,
+  },
+  contextStatus: {
+    maxTokens: 128000,
+  },
+  planModeState,
+  extend: async (args) => {
+    // Custom logic runs after built-in steps
+    return {};
+  },
+});
+
+const result = await streamText({
+  model,
+  tools,
+  messages,
+  prepareStep,
+});`}
+          />
+          <p>
+            <strong>Important:</strong> <code>prepareStep</code> never touches
+            the <code>system</code> prompt &mdash; all dynamic content is
+            injected as user messages to preserve prompt caching.
+          </p>
+        </section>
+
+        <section>
+          <h2 id="full-example">Full Example</h2>
+          <CodeBlock
+            language="typescript"
+            copyable
+            code={`import {
+  createLocalSandbox,
+  createAgentTools,
+  buildSystemContext,
+  createPrepareStep,
+} from 'bashkit';
+import { streamText } from 'ai';
+
+const sandbox = createLocalSandbox({ workingDirectory: '.' });
+
+const { tools, planModeState, contextLayers } = await createAgentTools(
+  sandbox,
+  {
+    context: {
+      executionPolicy: {},       // plan mode gating with defaults
+      outputPolicy: {
+        maxOutputLength: 50000,
+        stashOutput: { sandbox, tools: ['Bash'] },
+      },
+    },
+    budget: { maxUsd: 5.0 },
+  },
+);
+
+const ctx = await buildSystemContext(sandbox, {
+  instructions: true,
+  environment: true,
+});
+
+const prepareStep = createPrepareStep({
+  planModeState,
+  contextStatus: { maxTokens: 128000 },
+});
+
+const result = await streamText({
+  model,
+  system: ctx.combined,
+  tools,
+  messages,
+  prepareStep,
+});`}
+          />
+        </section>
+      </article>
+
+      <Footer />
+    </>
+  );
+}
diff --git a/package.json b/package.json
index 3d3c98a..cee5bc7 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "bashkit",
-  "version": "0.6.0",
+  "version": "0.7.0",
   "description": "Agentic coding tools for the Vercel AI SDK",
   "type": "module",
   "main": "./dist/index.js",

From 00c13b3bc849ed3a4fe29dd313fc07f62f09916e Mon Sep 17 00:00:00 2001
From: jbreite <josh@joinpogo.com>
Date: Sun, 12 Apr 2026 10:13:16 -0400
Subject: [PATCH 11/11] Skip execution policy layer when no planModeState or
 shouldBlock

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/tools/index.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/tools/index.ts b/src/tools/index.ts
index c871cf7..694f5e5 100644
--- a/src/tools/index.ts
+++ b/src/tools/index.ts
@@ -251,7 +251,7 @@ export async function createAgentTools(
 
   if (config?.context) {
     // Execution policy (plan-mode gating and/or custom shouldBlock)
-    if (planModeState || config.context.executionPolicy) {
+    if (planModeState || config.context.executionPolicy?.shouldBlock) {
       contextLayers.push(
         createExecutionPolicy(planModeState, config.context.executionPolicy),
       );