From 40f018b59d478455c85b816920f115985b8faac5 Mon Sep 17 00:00:00 2001 From: Bojan Date: Tue, 21 Apr 2026 15:17:13 +0200 Subject: [PATCH 01/21] Add agent-scope: task-scoped write permissions for AI agents --- .cursor/hooks.json | 34 ++ .cursor/hooks/scope-guard.mjs | 142 ++++++++ .cursor/hooks/session-start.mjs | 128 +++++++ .cursor/hooks/shell-diff-check.mjs | 132 +++++++ .cursor/hooks/shell-precheck.mjs | 182 ++++++++++ .cursor/rules/agent-scope.mdc | 241 ++++++++++++ .gitignore | 6 + CLAUDE.md | 114 ++++++ agent-scope/README.md | 366 +++++++++++++++++++ agent-scope/bin/install-git-hooks.sh | 53 +++ agent-scope/bin/scope-check.mjs | 64 ++++ agent-scope/bin/task.mjs | 426 ++++++++++++++++++++++ agent-scope/hooks/pre-commit | 52 +++ agent-scope/lib/denial.mjs | 409 +++++++++++++++++++++ agent-scope/lib/denial.test.mjs | 525 +++++++++++++++++++++++++++ agent-scope/lib/log.mjs | 89 +++++ agent-scope/lib/scope.mjs | 415 +++++++++++++++++++++ agent-scope/lib/scope.test.mjs | 427 ++++++++++++++++++++++ agent-scope/lib/shell-parse.mjs | 185 ++++++++++ agent-scope/lib/shell-parse.test.mjs | 248 +++++++++++++ agent-scope/schema/task.schema.json | 48 +++ agent-scope/tasks/base.json | 20 + agent-scope/tasks/staking.json | 20 + agent-scope/tasks/sync-refactor.json | 12 + package.json | 9 +- 25 files changed, 4346 insertions(+), 1 deletion(-) create mode 100644 .cursor/hooks.json create mode 100755 .cursor/hooks/scope-guard.mjs create mode 100755 .cursor/hooks/session-start.mjs create mode 100755 .cursor/hooks/shell-diff-check.mjs create mode 100755 .cursor/hooks/shell-precheck.mjs create mode 100644 .cursor/rules/agent-scope.mdc create mode 100644 agent-scope/README.md create mode 100755 agent-scope/bin/install-git-hooks.sh create mode 100755 agent-scope/bin/scope-check.mjs create mode 100755 agent-scope/bin/task.mjs create mode 100755 agent-scope/hooks/pre-commit create mode 100644 agent-scope/lib/denial.mjs create mode 100644 agent-scope/lib/denial.test.mjs create mode 100644 agent-scope/lib/log.mjs create mode 100644 agent-scope/lib/scope.mjs create mode 100644 agent-scope/lib/scope.test.mjs create mode 100644 agent-scope/lib/shell-parse.mjs create mode 100644 agent-scope/lib/shell-parse.test.mjs create mode 100644 agent-scope/schema/task.schema.json create mode 100644 agent-scope/tasks/base.json create mode 100644 agent-scope/tasks/staking.json create mode 100644 agent-scope/tasks/sync-refactor.json diff --git a/.cursor/hooks.json b/.cursor/hooks.json new file mode 100644 index 000000000..a2c9305f5 --- /dev/null +++ b/.cursor/hooks.json @@ -0,0 +1,34 @@ +{ + "version": 1, + "hooks": { + "sessionStart": [ + { + "command": ".cursor/hooks/session-start.mjs", + "failClosed": false, + "timeout": 5 + } + ], + "preToolUse": [ + { + "command": ".cursor/hooks/scope-guard.mjs", + "matcher": "Write|StrReplace|Delete|EditNotebook|MultiEdit|Edit", + "failClosed": true, + "timeout": 5 + } + ], + "beforeShellExecution": [ + { + "command": ".cursor/hooks/shell-precheck.mjs", + "failClosed": false, + "timeout": 5 + } + ], + "afterShellExecution": [ + { + "command": ".cursor/hooks/shell-diff-check.mjs", + "failClosed": false, + "timeout": 10 + } + ] + } +} diff --git a/.cursor/hooks/scope-guard.mjs b/.cursor/hooks/scope-guard.mjs new file mode 100755 index 000000000..0701e3283 --- /dev/null +++ b/.cursor/hooks/scope-guard.mjs @@ -0,0 +1,142 @@ +#!/usr/bin/env node +// Cursor preToolUse hook. Blocks writes to paths outside the active task's scope, +// and always-deny to hardcoded protected system files. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; +const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, + normalizeToRepoPath, checkNodeVersion, checkProtected, +} = await import(scopeUrl); +const { logDenial, logDecision } = await import(logUrl); +const { + buildPreToolUseDenial, buildLoadErrorDenial, +} = await import(denialUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write(JSON.stringify({ permission: 'allow' })); + process.exit(0); +} + +function allow() { + process.stdout.write(JSON.stringify({ permission: 'allow' })); + process.exit(0); +} + +function deny(msg) { + process.stdout.write(JSON.stringify({ + permission: 'deny', + agent_message: msg, + user_message: 'agent-scope blocked an out-of-task write — see agent_message for the plan-mode menu.', + })); + process.exit(0); +} + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +function extractTargetPath(toolInput) { + if (!toolInput || typeof toolInput !== 'object') return null; + return ( + toolInput.path || + toolInput.target_file || + toolInput.file_path || + toolInput.filepath || + toolInput.notebook_path || + toolInput.target_notebook || + null + ); +} + +async function main() { + const raw = readStdin(); + if (!raw) return allow(); + + let payload; + try { payload = JSON.parse(raw); } catch { return allow(); } + + const toolName = payload.tool_name || payload.toolName || payload.tool || ''; + const toolInput = payload.tool_input || payload.toolInput || payload.input || {}; + const sessionId = payload.session_id || payload.sessionId || null; + + const GUARDED = /^(Write|StrReplace|Delete|EditNotebook|MultiEdit|Edit)$/; + if (!GUARDED.test(toolName)) return allow(); + + const targetPath = extractTargetPath(toolInput); + if (!targetPath) return allow(); + + const root = resolveRepoRoot(); + const rel = normalizeToRepoPath(root, targetPath); + + // Protected-path check runs even without an active task. + if (checkProtected(rel, root) === 'deny') { + const { id: tid } = resolveActiveTaskId(root); + logDenial(root, { + event: 'preToolUse.protected', + tool: toolName, + path: rel, + task: tid, + sessionId, + }); + const { message } = buildPreToolUseDenial({ + tool: toolName, deniedPath: rel, decision: 'protected', + task: null, taskId: tid, root, + }); + return deny(message); + } + + const { id: taskId, source: taskSource } = resolveActiveTaskId(root); + if (!taskId) return allow(); + + let task; + try { task = loadTask(root, taskId); } + catch (e) { + const { message } = buildLoadErrorDenial({ taskId, error: e.message }); + return deny(message); + } + + const decision = checkPath(task, rel, root); + + logDecision(root, { + event: 'preToolUse', + tool: toolName, + decision, + path: rel, + task: taskId, + taskSource, + sessionId, + }); + + if (decision === 'allow' || decision === 'exempt') return allow(); + + logDenial(root, { + event: 'preToolUse.deny', + tool: toolName, + path: rel, + decision, + task: taskId, + taskSource, + sessionId, + }); + + const { message } = buildPreToolUseDenial({ + tool: toolName, deniedPath: rel, decision, + task, taskId, root, + }); + return deny(message); +} + +main().catch(err => { + process.stderr.write(`scope-guard hook error: ${err?.message || err}\n`); + allow(); +}); diff --git a/.cursor/hooks/session-start.mjs b/.cursor/hooks/session-start.mjs new file mode 100755 index 000000000..2f7a081a1 --- /dev/null +++ b/.cursor/hooks/session-start.mjs @@ -0,0 +1,128 @@ +#!/usr/bin/env node +// Cursor sessionStart hook. Injects the active task's scope into the agent's +// initial context so the agent knows what it may modify without having to +// hit a deny first. Also surfaces bootstrap-mode status. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkNodeVersion, isBootstrapActive, +} = await import(scopeUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(context) { + if (!context) { process.stdout.write('{}'); process.exit(0); } + process.stdout.write(JSON.stringify({ additional_context: context })); + process.exit(0); +} + +function readStdin() { + try { readFileSync(0, 'utf8'); } catch { /* ignore */ } +} + +async function main() { + readStdin(); + const root = resolveRepoRoot(); + const { id: taskId, source } = resolveActiveTaskId(root); + const bootstrap = isBootstrapActive(root); + + const header = []; + if (bootstrap) { + header.push( + '# agent-scope: BOOTSTRAP MODE ACTIVE', + '', + 'Hardcoded path protection is currently DISABLED because a human has enabled', + 'bootstrap mode (token file or env var). Writes to system files are permitted.', + '', + 'If you are not explicitly working on improving agent-scope itself, ask the', + 'user to disable bootstrap mode before proceeding:', + ' rm agent-scope/.bootstrap-token', + '', + ); + } + + if (!taskId) { + // No task + no bootstrap → the system is fully invisible. The agent + // behaves like agent-scope doesn't exist. Protected paths still guard + // themselves via preToolUse/beforeShell, but that only fires if the + // agent actually tries to touch them, so there's no need to announce + // anything up front. If bootstrap is on, do surface the warning. + if (!bootstrap) return emit(null); + return emit(header.concat([ + '# agent-scope: no active task', + '', + 'Bootstrap is active but no task is set. System files are currently', + 'writable. When you finish the protected work, remove the token:', + ' rm agent-scope/.bootstrap-token', + ]).join('\n')); + } + + let task; + try { task = loadTask(root, taskId); } + catch (e) { + return emit(header.concat([ + `# agent-scope: ACTIVE TASK MANIFEST BROKEN (${taskId})`, + '', + `The manifest at agent-scope/tasks/${taskId}.json failed to load:`, + ` ${e.message}`, + '', + 'All writes will be denied until this is fixed. STOP and report this to the user.', + ]).join('\n')); + } + + const allowedPositive = (task.allowed || []).filter(p => !p.startsWith('!')); + const allowedNegative = (task.allowed || []).filter(p => p.startsWith('!')); + const exemptionsPositive = (task.exemptions || []).filter(p => !p.startsWith('!')); + const exemptionsNegative = (task.exemptions || []).filter(p => p.startsWith('!')); + + const lines = header.concat([ + `# agent-scope: active task — ${task.id}`, + '', + `**Description:** ${task.description || '(none)'}`, + task.owner ? `**Owner:** ${task.owner}` : null, + `**Resolved from:** ${source}`, + task.__inheritedFrom && task.__inheritedFrom.length ? `**Inherits from:** ${task.__inheritedFrom.join(', ')}` : null, + '', + '## You may modify files matching:', + ...(allowedPositive.length ? allowedPositive.map(p => `- \`${p}\``) : ['- (nothing)']), + ]); + if (exemptionsPositive.length) { + lines.push('', '## Always allowed (build artifacts, lockfiles):'); + for (const p of exemptionsPositive) lines.push(`- \`${p}\``); + } + if (allowedNegative.length || exemptionsNegative.length) { + lines.push('', '## Explicitly denied (even if they look in-scope):'); + for (const p of [...allowedNegative, ...exemptionsNegative]) lines.push(`- \`${p}\``); + } + if (task.notes) { + lines.push('', '## Task notes', task.notes); + } + lines.push( + '', + '## Rules', + '- You may **read** any file in the repo.', + '- You may **write** only files matching the patterns above.', + '- System files (`.cursor/hooks/**`, `agent-scope/lib/**`, etc.) are hardcode-protected regardless of task.' + (bootstrap ? ' (currently bypassed by bootstrap mode)' : ''), + '- If you believe an out-of-scope file must be changed for this task, STOP and ask the user for explicit approval. The user will grant approval by editing the manifest.', + '- A Cursor hook enforces this on every Write/Edit/Delete. A pre-shell hook blocks destructive shell commands on denied paths. A post-shell hook reverts anything that slipped through. Pre-commit and CI also block out-of-scope commits.', + '- To clear or switch tasks, ask the user — do not edit `agent-scope/active` yourself.', + ); + + emit(lines.filter(l => l !== null).join('\n')); +} + +main().catch(err => { + process.stderr.write(`session-start hook error: ${err?.message || err}\n`); + emit(null); +}); diff --git a/.cursor/hooks/shell-diff-check.mjs b/.cursor/hooks/shell-diff-check.mjs new file mode 100755 index 000000000..b73b3c75d --- /dev/null +++ b/.cursor/hooks/shell-diff-check.mjs @@ -0,0 +1,132 @@ +#!/usr/bin/env node +// Cursor afterShellExecution hook. Reverts file changes that are either +// out-of-task-scope OR touch a hardcoded-protected system file. +// +// Untracked files: +// - in a protected path → DELETED (can't let agent establish persistent state +// via `node -e` / `python -c` bypass of pre-shell) +// - out-of-task-scope, not protected → DELETED (matches default-deny intent) +// - in-scope or exempt → left alone + +import { readFileSync, rmSync, existsSync } from 'node:fs'; +import { execSync } from 'node:child_process'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; +const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, checkNodeVersion, +} = await import(scopeUrl); +const { logDenial } = await import(logUrl); +const { buildAfterShellContext } = await import(denialUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { process.stdout.write(JSON.stringify(obj)); process.exit(0); } +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +function gitPorcelain(root) { + try { + return execSync('git status --porcelain', { + cwd: root, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], + }); + } catch { return null; } +} + +function parsePorcelain(out) { + const results = []; + for (const line of out.split('\n')) { + if (!line) continue; + const status = line.slice(0, 2); + const rest = line.slice(3); + const arrow = rest.indexOf(' -> '); + const path = arrow >= 0 ? rest.slice(arrow + 4) : rest; + results.push({ status, path: path.replace(/^"|"$/g, '') }); + } + return results; +} + +async function main() { + const raw = readStdin(); + let shellPayload = {}; + try { shellPayload = raw ? JSON.parse(raw) : {}; } catch { shellPayload = {}; } + const command = shellPayload.command || shellPayload.shell_command || ''; + const sessionId = shellPayload.session_id || null; + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + let task = null; + if (taskId) { try { task = loadTask(root, taskId); } catch { return emit({}); } } + + const porcelain = gitPorcelain(root); + if (porcelain === null) return emit({}); + + const entries = parsePorcelain(porcelain); + const outOfScope = entries.filter(({ path }) => { + if (!path) return false; + const d = checkPath(task, path, root); + return d === 'deny' || d === 'protected'; + }); + if (outOfScope.length === 0) return emit({}); + + const reverted = []; + const deleted = []; + const unreverted = []; + for (const { status, path } of outOfScope) { + if (status.startsWith('??')) { + // Untracked new file in a denied location → delete it. + // This prevents agents from bypassing pre-shell (e.g. via `node -e`) to + // establish persistent state in protected paths. Directories are handled + // by recursive removal. + try { + const abs = resolve(root, path); + if (existsSync(abs)) rmSync(abs, { recursive: true, force: true }); + deleted.push(path); + } catch (e) { + unreverted.push({ status, path, reason: (e?.message || 'unknown').split('\n')[0] }); + } + continue; + } + try { + execSync(`git checkout -- ${JSON.stringify(path)}`, { + cwd: root, stdio: ['ignore', 'pipe', 'pipe'], + }); + reverted.push(path); + } catch (e) { + unreverted.push({ status, path, reason: (e?.message || 'unknown').split('\n')[0] }); + } + } + + for (const p of reverted) { + logDenial(root, { event: 'afterShell.revert', tool: 'Shell', path: p, task: taskId, command, sessionId }); + } + for (const p of deleted) { + logDenial(root, { event: 'afterShell.delete', tool: 'Shell', path: p, task: taskId, command, sessionId }); + } + for (const u of unreverted) { + logDenial(root, { event: 'afterShell.unreverted', tool: 'Shell', path: u.path, task: taskId, command, sessionId }); + } + + const { message } = buildAfterShellContext({ + command, task, taskId, root, + reverted, deleted, unreverted, + }); + emit({ additional_context: message }); +} + +main().catch(err => { + process.stderr.write(`shell-diff-check error: ${err?.message || err}\n`); + emit({}); +}); diff --git a/.cursor/hooks/shell-precheck.mjs b/.cursor/hooks/shell-precheck.mjs new file mode 100755 index 000000000..b63504fdf --- /dev/null +++ b/.cursor/hooks/shell-precheck.mjs @@ -0,0 +1,182 @@ +#!/usr/bin/env node +// Cursor beforeShellExecution hook. Scans shell commands for destructive +// operations targeting out-of-scope paths and blocks them BEFORE they run. +// +// Parsing logic lives in agent-scope/lib/shell-parse.mjs (pure + testable). +// +// Directly-detected destructive verbs: +// rm / unlink / rmdir / mv / cp / chmod / chown / truncate / install / ln / sed -i +// redirections > / >> / &> / tee +// find ... -delete / -exec rm ... +// xargs +// +// Nested shells (bash -c "...", sh -c, zsh -c, dash -c, ksh -c): +// Recursively parse the -c body and apply the same rules. +// +// Opaque evaluators (node -e, python -c, perl -e, ruby -e, php -r, lua -e, +// deno eval): string-scan the body. Deny iff it contains BOTH a write-intent +// hint (writeFileSync, os.remove, open(...,"w"), rm, etc.) AND references a +// protected path literal. This is conservative to avoid false positives; the +// afterShell hook is the backstop for anything that slips through (it +// deletes untracked files in denied paths and reverts tracked edits). + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; +const parseUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/shell-parse.mjs')).href; +const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, + normalizeToRepoPath, checkNodeVersion, PROTECTED_PATTERNS, coversProtected, +} = await import(scopeUrl); +const { logDenial } = await import(logUrl); +const { + splitCommands, tokenize, extractRedirections, extractDestructiveTargets, + extractFindTargets, extractXargsTarget, extractNestedShellBody, + extractOpaqueBody, bodyHasWriteIntent, bodyTouchesProtected, +} = await import(parseUrl); +const { buildShellPrecheckDenial } = await import(denialUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { process.stdout.write(JSON.stringify(obj)); process.exit(0); } +function allow() { emit({}); } +function deny(msg) { + emit({ + permission: 'deny', + agent_message: msg, + user_message: 'agent-scope pre-shell guard blocked a destructive command — see agent_message for the plan-mode menu.', + }); +} + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +// Scan one sub-command string. Recurses into bash -c "". +function scanSubCommand(sub, { task, root, violations, depth = 0 }) { + if (depth > 4) return; + const tokens = tokenize(sub); + if (!tokens.length) return; + + const nested = extractNestedShellBody(tokens); + if (nested) { + for (const s of splitCommands(nested.body)) { + scanSubCommand(s, { task, root, violations, depth: depth + 1 }); + } + return; + } + + const opaque = extractOpaqueBody(tokens); + if (opaque) { + const { evaluator, body } = opaque; + if (bodyHasWriteIntent(body) && bodyTouchesProtected(body, PROTECTED_PATTERNS)) { + violations.push({ + sub, cmd: `${evaluator} ${opaque.flag}`, + path: '(opaque body writes to protected path)', + decision: 'protected', + }); + } + return; + } + + const direct = extractDestructiveTargets(tokens); + const redirects = extractRedirections(tokens).map(t => ({ kind: 'redirect', path: t })); + const findTargets = extractFindTargets(tokens); + const xargsTarget = extractXargsTarget(tokens); + + const candidates = [ + ...direct.targets.map(t => ({ kind: direct.cmd, path: t })), + ...redirects, + ...(findTargets ? findTargets.targets.map(t => ({ kind: 'find', path: t })) : []), + ]; + + if (xargsTarget && bodyTouchesProtected(sub, PROTECTED_PATTERNS)) { + violations.push({ + sub, cmd: xargsTarget.cmd, + path: '(stdin-driven; command text mentions protected path)', + decision: 'protected', + }); + } + + for (const { kind, path } of candidates) { + if (!path) continue; + if (path.startsWith('/dev/') || path === '/dev/null') continue; + if (path.includes('://')) continue; + const rel = normalizeToRepoPath(root, path); + if (rel.startsWith('../') || rel === '..') continue; + + const decision = checkPath(task, rel, root); + if (decision === 'deny' || decision === 'protected') { + violations.push({ sub, cmd: kind, path: rel, decision }); + continue; + } + // For recursive/tree-destructive ops (rm -rf , find -delete), + // also check whether the target directory CONTAINS any protected path. + const isRecursive = kind === 'find' || (kind === 'rm' && /\brm\b.*\s-\w*r/.test(sub)); + if (isRecursive && coversProtected(rel, root)) { + violations.push({ sub, cmd: kind, path: rel, decision: 'protected (covers)' }); + } + } +} + +async function main() { + if (process.env.AGENT_SCOPE_BOOTSTRAP === '1') return allow(); + + const raw = readStdin(); + let payload = {}; + try { payload = raw ? JSON.parse(raw) : {}; } catch { return allow(); } + + const command = payload.command || payload.shell_command || ''; + const sessionId = payload.session_id || null; + if (!command || typeof command !== 'string') return allow(); + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + let task = null; + if (taskId) { + try { task = loadTask(root, taskId); } + catch { return allow(); } + } + + const violations = []; + for (const sub of splitCommands(command)) { + scanSubCommand(sub, { task, root, violations }); + } + + if (violations.length === 0) return allow(); + + for (const v of violations) { + logDenial(root, { + event: 'beforeShell.deny', + tool: 'Shell', + cmd: v.cmd, + path: v.path, + decision: v.decision, + task: taskId, + command, + sessionId, + }); + } + + const { message } = buildShellPrecheckDenial({ + command, violations, task, taskId, root, + }); + deny(message); +} + +main().catch(err => { + process.stderr.write(`shell-precheck error: ${err?.message || err}\n`); + allow(); +}); diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc new file mode 100644 index 000000000..ae24ef558 --- /dev/null +++ b/.cursor/rules/agent-scope.mdc @@ -0,0 +1,241 @@ +--- +description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` or an active task. All denials surface a plan-mode AskQuestion menu. +alwaysApply: true +--- + +# agent-scope — task-scoped writes + +The repo ships a task-scoping guard but it stays **invisible** unless the user +engages it. If no active task is set and bootstrap is off, the system is a +no-op for you — behave normally. The hardcoded protected paths (the guard's +own files) are still defended, but that only matters if you try to touch them. + +The user turns the system on in one of two ways: + +1. **CLI** — they run `pnpm task start`. That prints a line they paste into + chat beginning with `agent-scope: start task onboarding`. When you see that + line, follow the **Task onboarding protocol** below. +2. **Existing active task** — the session-start hook injects a context block + naming the active task. From that moment on, writes are task-scoped and + every denial must be surfaced via the plan-mode denial protocol below. + +## Task onboarding protocol — when the user wants to start a new scoped task + +Triggered by any of: + +- The trigger line from `pnpm task start` (paste from the user's terminal). +- The user typing "start a scoped task", "scope me", "agent-scope: start", or + similar intent. + +Follow these steps exactly: + +1. **Ask for the task description.** Send a plain chat message: + > "OK, let's scope a new task. Describe in detail what we're building or + > fixing — the more specific, the better the scope I can propose. Which + > packages, which behaviors, which tests, any files you already know about?" + +2. **Wait for the user's reply.** Do not explore yet. + +3. **Explore the codebase.** Use the DKG SPARQL queries in the top of this + file, plus `Glob`, `Grep`, and `Read`, to find: + - The package(s) the task touches + - The specific source files implementing the mentioned behavior + - Sibling test files that cover them + - Related schemas / contracts / migrations if the task mentions them + +4. **Draft a scope.** Aim for a set of globs that: + - Covers everything you reasonably expect to touch + - Errs slightly broad (under-scoping causes constant denials mid-work; + over-scoping just gives you rope the user can see via `pnpm task show`) + - Includes matching test files + - Always appends `!**/secrets.*`, `!**/.env*` as safety denies + - Inherits `base` unless there is a reason not to (base supplies the + standard exemptions: `**/dist/**`, lockfiles, build artifacts) + +5. **Propose the scope via `AskQuestion`.** The prompt **must** include: + - A one-sentence rephrasing of the user's task description + - The list of allowed globs you're proposing, bullet-pointed + - Your recommendation ("I recommend `approve`") + - The suggested task id (kebab-case, short) + + Options: + - `approve` — "Create the manifest with these globs and activate it." + - `show_globs` — "Let me see the full manifest JSON first." + - `edit` — "I want to tweak the globs before approving." + - `cancel` — "Don't start a task; I'll work unscoped." + - `custom_instruction` — "Let me type my own instruction." + +6. **On `approve`:** print a code block with the **exact** command for the user + to run in their own terminal. The user must run it (not you) because the + `afterShellExecution` hook would delete an untracked file in + `agent-scope/tasks/` if you ran it yourself: + + ```bash + pnpm task create \ + --description "" \ + --allowed "" \ + --allowed "" \ + ... + --inherits base \ + --activate + ``` + + Then say "Let me know when you've run that and I'll start." Wait for the + user to confirm with any short acknowledgement ("done", "go", "ok"). + +7. **On `show_globs`:** print the JSON you're about to propose, then re-ask + `AskQuestion` with the same options. + +8. **On `edit`:** ask the user which globs to change, then loop back to step 5. + +9. **On `cancel`:** acknowledge and continue working without a task (system + stays invisible). + +10. **On `custom_instruction`:** ask the user in plain chat what they want + instead, then do that. + +From step 6 onward, the active task is set and the plan-mode denial protocol +(below) governs every future write. + +## Plan-mode denial protocol — MANDATORY once a task is active + +Every denial message from agent-scope carries a structured menu. You must +**stop**, parse it, and surface it via `AskQuestion`. Do not retry, rewrite, +or work around a denial — the defense-in-depth layers will revert or delete +anything that slips through anyway. + +### Detecting a denial + +- `preToolUse` returned `{ permission: "deny" }` where `agent_message` starts + with `OUT OF TASK SCOPE` or `PROTECTED PATH`. +- `beforeShellExecution` returned `{ permission: "deny" }` whose + `agent_message` starts with `Destructive shell command blocked`. +- `afterShellExecution` returned an `additional_context` starting with + `agent-scope: shell command modified`. + +### Parsing the menu + +Every such message contains a fenced JSON block: + +``` + +{ JSON payload here } + +``` + +The JSON has this shape (TypeScript for clarity): + +```ts +{ + version: 1, + hook: "preToolUse" | "beforeShellExecution" | "afterShellExecution", + reason: "out-of-scope" | "protected" | "manifest-load-error" | "unknown", + deniedPath?: string, // preToolUse + command?: string, // shell hooks + activeTask: string | null, + suggestedGlob?: string, + alternativeTasks?: { id: string, description: string }[], + violations?: { cmd, path, decision }[], // shell-precheck + reverted?: string[], // after-shell + deleted?: string[], // after-shell + options: { + id: string, + label: string, + action: { + kind: "add_to_manifest" | "switch_task" | "skip" | "cancel" + | "bootstrap" | "fix_manifest" | "clear_task" | "custom", + task?: string, patterns?: string[], instruction?: string, error?: string, + } + }[], + recommendedOptionId: string, // suggested default (out-of-scope → add_glob, protected → cancel, load-error → fix_manifest) + agentReasoning: null, // placeholder — you fill this in via the AskQuestion prompt, see below +} +``` + +### What you must do when you see a denial + +1. **Stop.** Do not retry the operation with another tool or command form. +2. **Extract the JSON** from between `agent-scope-menu:begin` and `:end`. +3. **Call `AskQuestion`** with a single question whose prompt **must include**: + - The denied path (or command). + - **Your reasoning** in 1–2 sentences: why you wanted to touch this file, + what you were trying to accomplish. This is the plan-mode "here's what I + was thinking" that the user needs to make an informed decision. + - Your **recommendation**: which option you'd pick if you were the user, + and why. Lead with the JSON's `recommendedOptionId` unless you have a + concrete reason to override it. + - The options array, verbatim from the JSON — use each entry's `id` as the + option id and `label` as the display label. +4. **Act on the user's choice** using the matching `action` object: + - `add_to_manifest` → edit `agent-scope/tasks/.json`, append the + listed patterns to `allowed`, save, then retry the original operation. + - `switch_task` → run `pnpm task set `, then retry. + - `bootstrap` → print `action.instruction` verbatim. Wait for the user to + enable bootstrap. After the protected work is done, remind them to + `rm agent-scope/.bootstrap-token` to re-lock. + - `fix_manifest` → open the referenced manifest, fix the reported error, + re-run `pnpm task validate `. + - `clear_task` → run `pnpm task clear`. + - `skip` → acknowledge; move on to other in-scope work. + - `cancel` → stop the turn; summarise what got done and what didn't. + - `custom` → ask the user in plain chat: "OK, what should I do instead?" + Wait for their free-text reply, then carry out whatever they say. +5. **Never invent options.** If none of the presets match and `custom` isn't + present (shouldn't happen — it's in every menu), pick `cancel`. + +## What counts as a write + +The guard blocks `Write`, `StrReplace`, `Delete`, `EditNotebook`, `MultiEdit`, +and `Edit`. Shell commands are inspected **before** they run — destructive +verbs (`rm`, `mv`, `cp`, `chmod`, `chown`, `truncate`, `ln -sf`, `sed -i`, +`>`, `>>`, `tee`, `find -delete`, `find -exec rm`, `xargs rm`), nested shells +(`bash -c "..."`), and opaque evaluators (`node -e`, `python -c`, `perl -e`) +that touch protected or out-of-scope paths are denied. Anything that still +slips through is reverted after the command runs, and untracked files in +denied paths are deleted. + +## Hardcoded protected paths + +Always denied regardless of task, unless a human has enabled bootstrap +(`touch agent-scope/.bootstrap-token` or `AGENT_SCOPE_BOOTSTRAP=1`): + +- `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` +- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/hooks/**` +- `agent-scope/schema/**`, `agent-scope/tasks/**`, `agent-scope/active`, + `agent-scope/.bootstrap-token` +- `.git/hooks/**`, `.github/workflows/agent-scope.yml` + +If one of these needs to change, use the `bootstrap` option from the denial +menu — do not try to bypass (no shell redirection, no `node -e`, no alternate +tooling). The `afterShell` hook will delete any untracked files in these paths +even if the bypass succeeded, so retry attempts are wasted. + +## Manifest cheat sheet + +```json +{ + "id": "example", + "description": "...", + "inherits": ["base"], + "allowed": [ + "packages/foo/**/*.ts", + "!packages/foo/**/secrets.*" + ], + "exemptions": ["**/dist/**", "pnpm-lock.yaml"] +} +``` + +- Patterns support `*`, `**`, `?`. +- Prefix with `!` for an explicit deny that overrides everything else. +- `inherits` pulls `allowed` + `exemptions` from a parent manifest (e.g. `base`). +- Default-deny: anything not matched is blocked. + +## Don't + +- Don't edit `agent-scope/active` yourself — ask the user to switch tasks via + the menu's `switch_task` action, or via `pnpm task set`. +- Don't add patterns to a manifest to unblock yourself — always confirm via + the plan-mode menu first. +- Don't run `pnpm task create` yourself for a new task — the user runs it, so + the manifest is human-authored and the afterShell backstop doesn't wipe it. +- Don't retry a blocked operation with a different tool or command form. diff --git a/.gitignore b/.gitignore index e64f6cf7d..9770c9cae 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,9 @@ packages/evm-module/deployments/hardhat_contracts.json packages/evm-module/deployments/localhost_contracts.json snapshots/_cache_phase1_neuroweb_epoch16.json .claude/ + +# agent-scope: never commit the bootstrap override, local audit logs, or +# the per-developer active-task pointer +agent-scope/.bootstrap-token +agent-scope/logs/ +agent-scope/active diff --git a/CLAUDE.md b/CLAUDE.md index d98bd010a..bd9a8cbec 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -181,3 +181,117 @@ All classes and properties use the `devgraph:` namespace (`https://ontology.dkg. | `Contract` | A Solidity smart contract | The full ontology is at `packages/mcp-server/schema/dev-paranet.ttl`. + +--- + +## Task-scoped writes (`agent-scope`) — MANDATORY behaviour + +This repo ships an opt-in task-scoping guard. It stays **invisible** unless +engaged. Default behaviour: write normally, read anything. The guard only +kicks in when: + +1. The user pastes a line starting with `agent-scope: start task onboarding` + (output of `pnpm task start`), OR +2. An active task is set (session-start hook injects a context block naming + it), OR +3. You try to touch one of the hardcoded protected paths that defend the + guard itself — those are always blocked unless the user has enabled + bootstrap mode. + +### Task onboarding protocol (triggered by `pnpm task start` / "start a scoped task") + +1. Acknowledge in plain chat and ask the user to describe the task in detail + (which packages, which behaviours, which tests, any specific files). +2. Wait for the description. +3. Explore the codebase with DKG queries + `Glob` + `Grep` + `Read` to find + the files the task will touch. +4. Draft a set of globs that covers those files plus their tests. Err + slightly broad; inherit `base`; always append `!**/secrets.*`, + `!**/.env*`. +5. Propose the scope via `AskQuestion`. The prompt must include a one-line + rephrase of the task, the list of proposed globs, and your recommendation. + Options: `approve`, `show_globs`, `edit`, `cancel`, `custom_instruction`. +6. On `approve`, print a fenced bash block with the **exact** command for the + user to run in their terminal (not you — the `afterShellExecution` hook + would delete a new manifest file you created yourself): + + ```bash + pnpm task create \ + --description "..." \ + --allowed "" \ + --allowed "" \ + --inherits base \ + --activate + ``` + + Wait for them to confirm ("done"/"go"), then start the actual work. +7. On `show_globs`, print the manifest JSON, then re-ask with the same options. +8. On `edit`, ask which globs to change, loop back to step 5. +9. On `cancel`, acknowledge and keep working without a task. +10. On `custom_instruction`, ask in plain chat what they want instead. + +### Plan-mode denial protocol (runs for every agent-scope denial) + +When any of these happen, stop and surface a menu. Do NOT retry, rewrite, or +work around the denial — the defense-in-depth layers revert tracked changes +and delete untracked files in denied paths anyway: + +- `preToolUse` returned `{ permission: "deny" }` with `OUT OF TASK SCOPE` or + `PROTECTED PATH` in the message. +- `beforeShellExecution` returned `{ permission: "deny" }` with + `Destructive shell command blocked` in the message. +- `afterShellExecution` returned `additional_context` starting with + `agent-scope: shell command modified`. + +Every such message contains a fenced JSON block: + +``` + +{ ... JSON payload ... } + +``` + +The JSON has `options[]` and `recommendedOptionId`. It also has a placeholder +`agentReasoning: null` — you fill this in by including your reasoning in the +AskQuestion prompt (see below). + +**Protocol:** + +1. **Stop.** Do not retry via another tool or command form. +2. **Extract the JSON.** Parse between the fences. +3. **Call `AskQuestion`** with ONE question whose prompt **must include**: + - The denied path / command. + - **Your reasoning in 1–2 sentences** — why you wanted to touch this file, + what you were trying to accomplish. This is the "here's what I was + thinking" that the user needs to make an informed decision. + - **Your recommendation** — lead with the JSON's `recommendedOptionId` + unless you have a concrete reason to override it. + - The full `options` array, verbatim — use each entry's `id`/`label`. +4. **Act on the user's choice** by matching the `action.kind`: + - `add_to_manifest` → edit `agent-scope/tasks/.json`, append patterns + to `allowed`, retry. + - `switch_task` → `pnpm task set `, retry. + - `bootstrap` → print `action.instruction` verbatim, wait for the user. + Remind them to `rm agent-scope/.bootstrap-token` when done. + - `fix_manifest` → open the manifest, fix the error, validate. + - `clear_task` → `pnpm task clear`. + - `skip` → acknowledge, move on. + - `cancel` → stop the turn, summarise. + - `custom` → ask the user in plain chat "what should I do instead?", do + what they say. +5. **Never invent options.** If nothing fits and no `custom` is listed (it + always is), pick `cancel`. + +### CLI quick reference + +``` +pnpm task start # begin guided onboarding (prints chat trigger) +pnpm task create [flags] # non-interactive manifest build — USER runs this +pnpm task list | show | set | clear | check | audit | resolve +pnpm scope:status | scope:validate | scope:test +``` + +Manifest format is in `agent-scope/README.md`. Never edit a protected path +(`.cursor/hooks/**`, `agent-scope/lib/**`, etc.) without user-granted +bootstrap. Never improvise around a denial. + diff --git a/agent-scope/README.md b/agent-scope/README.md new file mode 100644 index 000000000..57ae93fac --- /dev/null +++ b/agent-scope/README.md @@ -0,0 +1,366 @@ +# agent-scope + +Task-scoped write permissions for AI coding agents. + +An agent can **read** the whole repo, but can only **write** files that are +listed in the active task's manifest. Attempts to write out-of-scope files are +blocked by Cursor hooks, git pre-commit, and CI — and must be explicitly +approved by a human (by editing the manifest). + +## Opt-in by default + +agent-scope stays **invisible** until you engage it. With no active task and +no bootstrap, Cursor sessions behave as if the system didn't exist: the +session-start hook emits nothing, and the write/shell hooks only fire on the +hardcoded protected paths (the guard's own files). You can do ad-hoc work +without any task ceremony. + +You engage the system in one of three ways: + +1. **Guided onboarding** — run `pnpm task start`. The CLI prints a line you + paste into your Cursor chat. The agent then asks you to describe the task, + explores the codebase, proposes a scope via AskQuestion, and prints the + exact `pnpm task create` command for you to run. +2. **Explicit** — `pnpm task set ` activates a manifest you + already have. +3. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` + builds + activates a manifest in one shot. + +Clearing the active task (`pnpm task clear`) returns Cursor to its invisible +default. + +## Architecture (defense in depth) + +``` +Agent → Cursor sessionStart hook → injects active-task context (silent when idle) +Agent → Cursor preToolUse hook → blocks out-of-scope Write/Edit/Delete +Agent → Cursor beforeShellExecution → blocks destructive shell cmds on denied paths +Agent → Cursor afterShellExecution → reverts out-of-scope shell writes +System → hardcoded protected paths → always blocks writes to agent-scope itself +Dev → git pre-commit hook → blocks local commits of out-of-scope files +CI → GitHub Actions → blocks PRs with out-of-scope diffs +Ops → optional webhook sink → forwards denials to DKG/Slack/etc. +``` + +All layers use the same library (`agent-scope/lib/scope.mjs`) and the same +manifests (`agent-scope/tasks/*.json`). No layer is optional — bypassing one +(e.g. Cursor's hook) still leaves the commit, PR, and review layers. + +## Concepts + +| Concept | File | Description | +|---|---|---| +| **Task manifest** | `agent-scope/tasks/.json` | Declares what files a task is allowed to modify | +| **JSON schema** | `agent-scope/schema/task.schema.json` | Validates manifest structure | +| **Active task** | `agent-scope/active` | One-line file with the active task id (gitignored, per-developer) | +| **Env override** | `AGENT_SCOPE_TASK` | If set, takes precedence over the file | +| **Branch convention** | `task//*` or `agent-scope//*` | Branch name auto-detects the task if the env/file is unset | +| **Git config fallback** | `git config agent-scope.task ` | Lowest-priority fallback | +| **Denial log** | `agent-scope/logs/denials.jsonl` | Append-only audit trail (gitignored, rotates at 5MB) | +| **Bootstrap token** | `agent-scope/.bootstrap-token` | If present, disables hardcoded path protection (for maintainers editing agent-scope itself) | +| **Bootstrap env** | `AGENT_SCOPE_BOOTSTRAP=1` | Same as above but per-process | +| **Webhook sink** | `AGENT_SCOPE_WEBHOOK=` | POSTs each denial to the URL (opt-in) | + +## One-time setup + +```bash +# Install the git pre-commit hook (per developer) +pnpm scope:install-hooks + +# Verify everything is in order +pnpm scope:test # runs the scope library unit tests +pnpm scope:validate # validates every manifest +``` + +## Quick start + +```bash +# Guided onboarding — prints a chat trigger for the Cursor agent +pnpm task start + +# Non-interactive manifest creation (run this yourself; see "Onboarding flow") +pnpm task create my-task \ + --description "Refactor peer sync for workspace auth" \ + --allowed "packages/agent/src/**sync*" \ + --allowed "packages/agent/test/**sync*" \ + --inherits base \ + --activate + +# List available tasks (marks the active one with *) +pnpm task list + +# Set the active task +pnpm task set sync-refactor + +# See which task is active and what it allows +pnpm task show + +# Debug how the active task was resolved (env/file/branch/git-config) +pnpm task resolve + +# Create a new task manifest interactively (prompts you) +pnpm task init my-task + +# Check whether a specific path is in scope +pnpm task check packages/agent/src/sync-handler.ts +# → allow + +# Recent denials (audit) +pnpm task audit + +# Clear the active task (writes unrestricted again) +pnpm task clear +``` + +## Onboarding flow + +The `pnpm task start` command is the paved path. It prints a trigger line +like: + +``` +agent-scope: start task onboarding. Please follow the Task onboarding +protocol in CLAUDE.md: ask me to describe the task, explore the codebase, +propose a scope via AskQuestion, and print the `pnpm task create` command +for me to run once I approve. +``` + +When you paste that into your Cursor chat, the agent follows a fixed protocol +(defined in `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`): + +1. Asks you to describe what you're building or fixing. +2. Explores the codebase (Grep / Glob / DKG) to find relevant files. +3. Proposes a set of globs via `AskQuestion` in plan-mode style — approve, + show JSON, edit, cancel, or type a custom instruction. +4. On approve, prints the exact `pnpm task create` command for you to run. +5. You run it in your terminal (not the agent — otherwise the + `afterShellExecution` hook would delete the new manifest as an untracked + file in a protected path). +6. The agent starts the real work. + +From here, every attempted write to an out-of-scope file triggers a plan-mode +AskQuestion menu — see **Escalation** below. + +## Manifest format + +```json +{ + "id": "sync-refactor", + "description": "Refactor peer sync protocol to add workspace sync auth", + "owner": "bojan", + "allowed": [ + "packages/agent/src/**sync*", + "packages/agent/src/discovery.ts", + "packages/core/src/**sync*", + "packages/publisher/src/**sync*", + "packages/*/test/**sync*", + "!**/secrets.*" + ], + "exemptions": [ + "**/dist/**", + "**/*.tsbuildinfo", + "pnpm-lock.yaml" + ] +} +``` + +- `allowed` — glob patterns that the agent may write to. Supports `*`, `**`, `?`. +- `exemptions` — patterns that are always allowed (build artifacts, lockfiles). +- `!pattern` — explicit deny, overrides everything else in both lists. +- **Default-deny**: anything not matched is blocked. + +Run `pnpm scope:validate` to verify all manifests conform to +`agent-scope/schema/task.schema.json`. + +## How enforcement works + +Six layers: + +1. **Cursor `sessionStart` hook** (`.cursor/hooks/session-start.mjs`) injects + the active task's allowed patterns into the agent's context so it knows + what it may modify from the first turn. **When no task is active and + bootstrap is off, the hook emits nothing** — the agent's initial context + is untouched. Only when a task is active (or bootstrap is on) does it + surface a context block. +2. **Cursor `preToolUse` hook** (`.cursor/hooks/scope-guard.mjs`) runs before + every `Write`, `StrReplace`, `Delete`, `EditNotebook`, `MultiEdit`, and + `Edit`. Runs the protected-path check first, then the task-scope check. +3. **Cursor `beforeShellExecution` hook** (`.cursor/hooks/shell-precheck.mjs`) + tokenises the pending shell command and blocks destructive verbs + (`rm`, `mv`, `cp`, `chmod`, `chown`, `truncate`, `ln -sf`, `sed -i`, + redirections `>` / `>>` / `tee`, `find -delete`, `xargs rm`) when their + target is out-of-scope or hardcode-protected. Recurses into `bash -c`, + `sh -c`, and opaque evaluators (`node -e`, `python -c`, `perl -e`) to + catch bypass attempts that hide destructive operations inside string + arguments. Parsing logic lives in `agent-scope/lib/shell-parse.mjs` and + is fully unit-tested. +4. **Cursor `afterShellExecution` hook** (`.cursor/hooks/shell-diff-check.mjs`) + is the backstop for anything the pre-check misses: it runs + `git status --porcelain` and `git checkout --` reverts any tracked + out-of-scope/protected modifications. Untracked files in denied paths + are **deleted** (so an agent cannot establish persistent state like a + new hook file via a pre-shell bypass). +5. **Git pre-commit hook** (`agent-scope/hooks/pre-commit`, installed via + `pnpm scope:install-hooks`) blocks local commits of out-of-scope files. + Also hard-refuses to commit `agent-scope/.bootstrap-token`. +6. **GitHub Actions** (`.github/workflows/agent-scope.yml`) runs on every + PR: validates all manifests, runs the unit tests, blocks the bootstrap + token from being committed, resolves the task id from the PR body or + branch name, and fails the check (with a PR comment) if any changed file + is out of scope. + +If no active task is set (no env, no file, no matching branch, no git-config) +**and** bootstrap is off, layer 1 is silent and layers 2–4 only trigger on +the hardcoded protected paths. Layers 5–6 likewise only gate protected-path +commits/PRs. Everything else is a no-op — you can do ad-hoc work without +changing the workflow. + +## Hardcoded protected paths + +Some files define the enforcement system itself. If the agent were free to +edit them, the whole thing would be worthless. These paths are **always +denied** regardless of active task, unless bootstrap mode is active: + +- `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` +- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/hooks/**` +- `agent-scope/schema/**`, `agent-scope/tasks/**`, `agent-scope/active` +- `agent-scope/.bootstrap-token` +- `.git/hooks/**`, `.github/workflows/agent-scope.yml` + +### Bootstrap mode + +To legitimately improve `agent-scope` itself, a human enables bootstrap mode. +Two equivalent switches: + +```bash +# Option A — file token (persists across sessions until deleted) +touch agent-scope/.bootstrap-token + +# Option B — env var (just for the current Cursor process) +export AGENT_SCOPE_BOOTSTRAP=1 +``` + +While bootstrap is active, the sessionStart hook prints a loud warning into +the agent context. When you're done, remove it: + +```bash +rm agent-scope/.bootstrap-token +``` + +The bootstrap token is in `.gitignore` and the pre-commit hook hard-refuses +to commit it, so it cannot leak into git history. + +## Manifest inheritance + +Manifests can share common exemptions (e.g. `**/dist/**`, `pnpm-lock.yaml`) +via an `inherits` field. The `base` task ships as a pure-exemption parent: + +```json +{ "id": "child", "inherits": ["base"], "allowed": ["src/**"] } +``` + +Inheritance merges parents first (deduplicating), then the child's own +`allowed`/`exemptions` are appended. `!pattern` denials in a child override +parent `allowed` patterns. Cycles are detected and rejected. + +## Optional webhook sink + +Forward denials to a DKG node / Slack / log aggregator by setting +`AGENT_SCOPE_WEBHOOK` to an http(s) URL. Each denial is POSTed as JSON +(fire-and-forget, 1.5s timeout). Activity is also written to +`agent-scope/logs/denials.jsonl` locally with automatic rotation at 5MB. + +## Escalation — plan-mode denial menu + +Every denial (preToolUse, beforeShellExecution, afterShellExecution) emits both +a human-readable prose block **and** a machine-readable JSON menu embedded in +the hook's response. Agents following `.cursor/rules/agent-scope.mdc` (and +`CLAUDE.md`) must parse the menu and surface it to the user via the same +`AskQuestion` mechanism Cursor uses for plan mode. + +The structured block is fenced by HTML comments so it's trivial to locate: + +``` + +{ + "version": 1, + "hook": "preToolUse", + "reason": "out-of-scope", + "deniedPath": "packages/evm-module/contracts/Staking.sol", + "activeTask": "sync-refactor", + "suggestedGlob": "packages/evm-module/contracts/**", + "alternativeTasks": [ { "id": "staking", "description": "..." } ], + "options": [ + { "id": "add_file", "label": "...", "action": { "kind": "add_to_manifest", ... } }, + { "id": "add_glob", "label": "...", "action": { "kind": "add_to_manifest", ... } }, + { "id": "switch_task_staking","label": "...", "action": { "kind": "switch_task", "task": "staking" } }, + { "id": "skip", "label": "...", "action": { "kind": "skip" } }, + { "id": "cancel", "label": "...", "action": { "kind": "cancel" } }, + { "id": "custom_instruction", "label": "Let me type my own instruction", "action": { "kind": "custom" } } + ], + "recommendedOptionId": "add_glob", + "agentReasoning": null +} + +``` + +Possible `action.kind` values: + +| kind | what the agent should do | +|---|---| +| `add_to_manifest` | Add `action.patterns` to `agent-scope/tasks/.json` under `allowed`, then retry the original operation. | +| `switch_task` | Run `pnpm task set `, then retry. | +| `bootstrap` | Print `action.instruction` to the user and wait for confirmation. Remind them to remove the token after. | +| `fix_manifest` | Open `agent-scope/tasks/.json`, fix the error (`action.error`), re-run `pnpm task validate`. | +| `clear_task` | Run `pnpm task clear`. | +| `skip` | Acknowledge and move on. | +| `cancel` | Stop the turn; summarise for the user. | +| `custom` | Ask the user in plain chat what they want instead, then do it. | + +Extra guidance in the block: + +- `recommendedOptionId` is a hint for which option to highlight. It's chosen + conservatively (`add_glob` for out-of-scope, `cancel` for protected, + `fix_manifest` for manifest-load errors). The agent is told to lead with it + unless overriding has a concrete reason. +- `agentReasoning: null` is a placeholder. The hook can't know the agent's + reasoning, so the agent **fills it in when surfacing the menu via + `AskQuestion`**: the prompt must include a 1–2 sentence "here's what I was + trying to do and why this file came up". Plan-mode equivalent. + +Heuristics (in `agent-scope/lib/denial.mjs`): + +- `suggestedGlob` is derived from the denied path's parent directory + (`dirname/**`). +- `alternativeTasks` lists up to 3 other manifests that already cover the + denied path. +- `protected` reasons offer only `bootstrap` / `skip` / `cancel` / + `custom_instruction` — no other option can legitimately unblock the write. + +Builders and tests live alongside the scope library: + +``` +agent-scope/lib/denial.mjs # the builders +agent-scope/lib/denial.test.mjs # 33 unit tests +``` + +No special tokens or APIs — the manifest is the source of truth; edit it to +grant permission. Changes to a manifest still go through normal review. + +## Emergency override + +For genuine emergencies, the git pre-commit hook can be skipped with: + +```bash +AGENT_SCOPE_SKIP=1 git commit ... +``` + +The CI check cannot be bypassed from the dev machine — it runs on GitHub. + +## Debug / audit + +```bash +pnpm task resolve # how was the active task resolved? +pnpm task audit # recent denials +pnpm task validate # check all manifests +tail -f agent-scope/logs/denials.jsonl +``` diff --git a/agent-scope/bin/install-git-hooks.sh b/agent-scope/bin/install-git-hooks.sh new file mode 100755 index 000000000..122cf3d37 --- /dev/null +++ b/agent-scope/bin/install-git-hooks.sh @@ -0,0 +1,53 @@ +#!/usr/bin/env bash +# Install agent-scope git hooks into .git/hooks/. +# +# Idempotent: if a hook with a different name already exists, we preserve it +# and only chain the agent-scope checks on top. + +set -euo pipefail + +repo_root=$(git rev-parse --show-toplevel) +cd "$repo_root" + +hooks_src="$repo_root/agent-scope/hooks" +hooks_dst="$repo_root/.git/hooks" + +if [[ ! -d "$hooks_dst" ]]; then + echo "error: $hooks_dst not found (is this a git repo?)" >&2 + exit 1 +fi + +install_hook() { + local name="$1" + local src="$hooks_src/$name" + local dst="$hooks_dst/$name" + + if [[ ! -f "$src" ]]; then + echo "skip: $name (no source)" >&2 + return + fi + + if [[ -f "$dst" ]] && ! grep -q 'agent-scope' "$dst" 2>/dev/null; then + # Existing non-agent-scope hook — back it up and chain. + cp "$dst" "$dst.pre-agent-scope.bak" + cat > "$dst" <] [--stdin] [ ...]'); + process.exit(0); + } + paths.push(a); +} + +if (stdinMode) { + const data = readFileSync(0, 'utf8'); + for (const line of data.split(/\r?\n/)) { + const p = line.trim(); + if (p) paths.push(p); + } +} + +if (paths.length === 0) { + console.error('scope-check: no paths provided'); + process.exit(2); +} + +const root = resolveRepoRoot(); +const taskId = taskOverride || getActiveTaskId(root); +const task = taskId ? loadTask(root, taskId) : null; + +let anyBad = false; +for (const p of paths) { + const rel = normalizeToRepoPath(root, p); + const decision = checkPath(task, rel, root); + console.log(`${decision.padEnd(9)} ${rel}`); + if (decision === 'deny' || decision === 'protected') anyBad = true; +} + +if (!task) { + console.error('(no active task — only protected paths enforced)'); +} + +process.exit(anyBad ? 1 : 0); diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs new file mode 100755 index 000000000..909471fcc --- /dev/null +++ b/agent-scope/bin/task.mjs @@ -0,0 +1,426 @@ +#!/usr/bin/env node +// Active-task management CLI. + +import { + readFileSync, writeFileSync, unlinkSync, existsSync, +} from 'node:fs'; +import { resolve } from 'node:path'; +import { createInterface } from 'node:readline'; +import { stdin as input, stdout as output } from 'node:process'; +import { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, + normalizeToRepoPath, listTasks, validateManifest, checkNodeVersion, + isBootstrapActive, +} from '../lib/scope.mjs'; + +try { checkNodeVersion(); } +catch (e) { console.error(e.message); process.exit(3); } + +const root = resolveRepoRoot(); +const tasksDir = resolve(root, 'agent-scope/tasks'); +const activeFile = resolve(root, 'agent-scope/active'); +const logsFile = resolve(root, 'agent-scope/logs/denials.jsonl'); +const bootstrapToken = resolve(root, 'agent-scope/.bootstrap-token'); + +function bail(msg, code = 1) { console.error(`error: ${msg}`); process.exit(code); } + +function bootstrapWarning() { + if (isBootstrapActive(root)) { + console.log(''); + console.log('!! BOOTSTRAP MODE ACTIVE — hardcoded path protection is DISABLED.'); + console.log(`!! Remove when done: rm ${bootstrapToken}`); + console.log(''); + } +} + +// --------------------------------------------------------------------------- + +function list() { + const ids = listTasks(root); + if (!ids.length) { console.log('(no task manifests found)'); return; } + const { id: activeId } = resolveActiveTaskId(root); + for (const id of ids) { + let desc = ''; + try { desc = loadTask(root, id).description || ''; } + catch { desc = '(invalid manifest — run: task validate ' + id + ')'; } + const marker = id === activeId ? '* ' : ' '; + console.log(`${marker}${id.padEnd(28)} ${desc}`); + } + bootstrapWarning(); +} + +function show() { + const { id, source } = resolveActiveTaskId(root); + if (!id) { + console.log('No active task. Writes are unrestricted (except for protected paths).'); + console.log(`Set one with: node agent-scope/bin/task.mjs set `); + bootstrapWarning(); + return; + } + const task = loadTask(root, id); + console.log(`Active task: ${task.id}`); + console.log(`Description: ${task.description || '(none)'}`); + console.log(`Owner: ${task.owner || '(unassigned)'}`); + console.log(`Resolved via: ${source}`); + console.log(`Manifest: ${task.__path}`); + if (task.__inheritedFrom && task.__inheritedFrom.length) { + console.log(`Inherits: ${task.__inheritedFrom.join(', ')}`); + } + console.log(''); + console.log('Allowed patterns:'); + for (const p of task.allowed || []) console.log(` ${p.startsWith('!') ? '[deny] ' : ' '}${p}`); + if (task.exemptions && task.exemptions.length) { + console.log('Exemptions:'); + for (const p of task.exemptions) console.log(` ${p.startsWith('!') ? '[deny] ' : ' '}${p}`); + } + if (task.notes) { + console.log(''); + console.log('Notes:'); + console.log(` ${task.notes.replace(/\n/g, '\n ')}`); + } + bootstrapWarning(); +} + +function set(id) { + if (!id) bail('usage: task set '); + loadTask(root, id); + writeFileSync(activeFile, `${id}\n`, 'utf8'); + console.log(`Active task set: ${id}`); +} + +function clear() { + if (existsSync(activeFile)) unlinkSync(activeFile); + console.log('Active task cleared. Writes are unrestricted (except for protected paths).'); +} + +function check(p) { + if (!p) bail('usage: task check '); + const { id } = resolveActiveTaskId(root); + const task = id ? loadTask(root, id) : null; + const rel = normalizeToRepoPath(root, p); + const decision = checkPath(task, rel, root); + console.log(`${decision.padEnd(9)} ${rel}${id ? ` [task: ${id}]` : ''}`); + if (decision === 'deny' || decision === 'protected') process.exit(1); +} + +async function init(id) { + if (!id) bail('usage: task init '); + if (!/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(id)) { + bail(`invalid id: ${id} (must match /^[a-z0-9][a-z0-9-_.]{0,63}$/)`); + } + const manifestPath = resolve(tasksDir, `${id}.json`); + if (existsSync(manifestPath)) bail(`manifest already exists: ${manifestPath}`); + + const rl = createInterface({ input, output, terminal: false }); + const buffered = []; + const waiters = []; + let closed = false; + rl.on('line', line => { + if (waiters.length) waiters.shift()(line); + else buffered.push(line); + }); + rl.on('close', () => { + closed = true; + while (waiters.length) waiters.shift()(''); + }); + const ask = (q) => new Promise(resolve => { + output.write(q); + if (buffered.length) return resolve(buffered.shift()); + if (closed) return resolve(''); + waiters.push(resolve); + }); + + try { + const description = (await ask('Description (one line): ')).trim(); + const owner = (await ask('Owner (free-form, blank ok): ')).trim(); + const inheritsStr = (await ask('Inherit from (comma-separated task ids, blank for [base]): ')).trim(); + const inherits = inheritsStr + ? inheritsStr.split(',').map(s => s.trim()).filter(Boolean) + : (listTasks(root).includes('base') ? ['base'] : []); + output.write('\n'); + output.write('Enter allowed glob patterns, one per line. Blank line to finish.\n'); + output.write('Tip: prefix a pattern with ! to explicitly deny (e.g. !**/secrets.*)\n'); + const allowed = []; + for (;;) { + const line = (await ask('allowed> ')).trim(); + if (!line) break; + allowed.push(line); + } + if (allowed.length === 0 && inherits.length === 0) { + rl.close(); + bail('at least one allowed pattern is required (unless you inherit from another task)'); + } + + output.write('\n'); + output.write('Enter additional exemption patterns. Blank to finish.\n'); + const exemptions = []; + for (;;) { + const line = (await ask('exempt > ')).trim(); + if (!line) break; + exemptions.push(line); + } + const notes = (await ask('Notes (blank ok): ')).trim(); + + const manifest = { + id, + description: description || undefined, + owner: owner || undefined, + created: new Date().toISOString(), + inherits: inherits.length ? inherits : undefined, + allowed: allowed.length ? allowed : undefined, + exemptions: exemptions.length ? exemptions : undefined, + notes: notes || undefined, + }; + const cleaned = Object.fromEntries(Object.entries(manifest).filter(([,v]) => v !== undefined)); + const errs = validateManifest(cleaned, id); + if (errs.length) { rl.close(); bail(`invalid manifest:\n - ${errs.join('\n - ')}`); } + + writeFileSync(manifestPath, JSON.stringify(cleaned, null, 2) + '\n', 'utf8'); + console.log(''); + console.log(`Created ${manifestPath}`); + console.log(`Activate with: node agent-scope/bin/task.mjs set ${id}`); + } finally { rl.close(); } +} + +// --------------------------------------------------------------------------- +// Task onboarding — `task start` prints a trigger the user pastes to chat; +// `task create` is the non-interactive manifest builder the onboarding flow +// ultimately runs. Both are designed so *the human* creates the manifest — +// an agent-invoked shell command that writes to agent-scope/tasks/ would be +// wiped by the afterShellExecution backstop. +// --------------------------------------------------------------------------- + +const ONBOARD_TRIGGER = + 'agent-scope: start task onboarding. Please follow the Task onboarding ' + + 'protocol in CLAUDE.md: ask me to describe the task, explore the codebase, ' + + 'propose a scope via AskQuestion, and print the `pnpm task create` command ' + + 'for me to run once I approve.'; + +function start() { + const { id: activeId } = resolveActiveTaskId(root); + if (activeId) { + console.log(`A task is already active: ${activeId}`); + console.log(`Run \`pnpm task clear\` first if you want to start a new one.`); + console.log(`Run \`pnpm task show\` to see its scope.`); + bootstrapWarning(); + return; + } + console.log('agent-scope task onboarding'); + console.log(''); + console.log('Paste this line into your Cursor chat to begin:'); + console.log(''); + console.log(' ' + ONBOARD_TRIGGER); + console.log(''); + console.log('The agent will:'); + console.log(' 1. ask you to describe the task in detail'); + console.log(' 2. explore the codebase for relevant files'); + console.log(' 3. propose a scope via AskQuestion (plan-mode style)'); + console.log(' 4. on approval, print the exact `pnpm task create` command'); + console.log(' for you to run here so the manifest is human-authored'); + console.log(''); + console.log('If you already know the scope, skip the dance:'); + console.log(' pnpm task create --description "..." \\'); + console.log(' --allowed "packages/foo/**" --allowed "packages/bar/baz.ts" \\'); + console.log(' --inherits base --activate'); + bootstrapWarning(); +} + +function parseCreateArgs(argv) { + const out = { + id: null, + description: null, + owner: null, + notes: null, + inherits: null, + allowed: [], + exemptions: [], + activate: false, + force: false, + }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + const next = () => { + const v = argv[++i]; + if (v === undefined) bail(`missing value for ${a}`); + return v; + }; + switch (a) { + case '--description': case '-d': out.description = next(); break; + case '--owner': out.owner = next(); break; + case '--notes': out.notes = next(); break; + case '--inherits': out.inherits = next().split(',').map(s => s.trim()).filter(Boolean); break; + case '--allowed': case '-a': out.allowed.push(next()); break; + case '--exemption': case '-e': out.exemptions.push(next()); break; + case '--activate': out.activate = true; break; + case '--force': out.force = true; break; + case '-h': case '--help': + console.log([ + 'usage: task create [flags]', + '', + ' --description, -d short description', + ' --owner free-form owner tag', + ' --notes multi-line notes (use \\n)', + ' --inherits comma-separated parent task ids', + ' --allowed, -a add an allowed pattern (repeatable)', + ' --exemption, -e add an exemption pattern (repeatable)', + ' --activate set as active task after creation', + ' --force overwrite an existing manifest', + ].join('\n')); + process.exit(0); + default: + if (a.startsWith('-')) bail(`unknown flag: ${a}`); + if (!out.id) { out.id = a; break; } + bail(`unexpected positional arg: ${a}`); + } + } + return out; +} + +function create(argv) { + const opts = parseCreateArgs(argv); + if (!opts.id) bail('usage: task create --description "..." --allowed "" [...]'); + if (!/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(opts.id)) { + bail(`invalid id: ${opts.id} (must match /^[a-z0-9][a-z0-9-_.]{0,63}$/)`); + } + const manifestPath = resolve(tasksDir, `${opts.id}.json`); + if (existsSync(manifestPath) && !opts.force) { + bail(`manifest already exists: ${manifestPath}\n (pass --force to overwrite)`); + } + const inherits = opts.inherits !== null + ? opts.inherits + : (listTasks(root).includes('base') && opts.id !== 'base' ? ['base'] : []); + + if (opts.allowed.length === 0 && inherits.length === 0) { + bail('at least one --allowed pattern is required (unless --inherits)'); + } + + const manifest = { + id: opts.id, + description: opts.description || undefined, + owner: opts.owner || undefined, + created: new Date().toISOString(), + inherits: inherits.length ? inherits : undefined, + allowed: opts.allowed.length ? opts.allowed : undefined, + exemptions: opts.exemptions.length ? opts.exemptions : undefined, + notes: opts.notes || undefined, + }; + const cleaned = Object.fromEntries(Object.entries(manifest).filter(([,v]) => v !== undefined)); + const errs = validateManifest(cleaned, opts.id); + if (errs.length) bail(`invalid manifest:\n - ${errs.join('\n - ')}`); + + writeFileSync(manifestPath, JSON.stringify(cleaned, null, 2) + '\n', 'utf8'); + console.log(`Created ${manifestPath}`); + + if (opts.activate) { + loadTask(root, opts.id); + writeFileSync(activeFile, `${opts.id}\n`, 'utf8'); + console.log(`Active task set: ${opts.id}`); + } else { + console.log(`Activate with: pnpm task set ${opts.id}`); + } + bootstrapWarning(); +} + +function validate(one) { + const ids = one ? [one] : listTasks(root); + if (!ids.length) { console.log('(no manifests found)'); return; } + let failed = 0; + for (const id of ids) { + const file = resolve(tasksDir, `${id}.json`); + if (!existsSync(file)) { console.error(`missing: ${file}`); failed++; continue; } + let raw, parsed; + try { raw = readFileSync(file, 'utf8'); } catch (e) { console.error(`${id}: cannot read (${e.message})`); failed++; continue; } + try { parsed = JSON.parse(raw); } catch (e) { console.error(`${id}: invalid JSON (${e.message})`); failed++; continue; } + const errs = validateManifest(parsed, id); + if (errs.length) { + failed++; + console.error(`${id}: INVALID`); + for (const err of errs) console.error(` - ${err}`); + continue; + } + try { + loadTask(root, id); + console.log(`${id}: ok`); + } catch (e) { + failed++; + console.error(`${id}: INVALID (inheritance)`); + console.error(` - ${e.message}`); + } + } + if (failed) process.exit(1); +} + +function audit(args) { + let limit = 50; + for (let i = 0; i < args.length; i++) { + const a = args[i]; + if (a === '--since' || a === '-n') { limit = parseInt(args[++i], 10) || 50; } + else if (a === '-h' || a === '--help') { console.log('usage: task audit [--since N]'); return; } + } + if (!existsSync(logsFile)) { console.log('(no denials logged)'); return; } + const lines = readFileSync(logsFile, 'utf8').split('\n').filter(Boolean); + const tail = lines.slice(-limit); + for (const line of tail) { + try { + const r = JSON.parse(line); + console.log(`${r.ts} ${(r.event || '-').padEnd(26)} ${(r.task || '-').padEnd(20)} ${r.path || r.command || ''}`); + } catch { + console.log(line); + } + } + console.log(`\n(${tail.length} of ${lines.length} entries)`); +} + +function resolveDebug() { + console.log(`repo root: ${root}`); + console.log(`env: AGENT_SCOPE_TASK=${process.env.AGENT_SCOPE_TASK || '(unset)'}`); + console.log(` AGENT_SCOPE_BOOTSTRAP=${process.env.AGENT_SCOPE_BOOTSTRAP || '(unset)'}`); + console.log(`bootstrap: ${isBootstrapActive(root) ? 'ACTIVE' : 'inactive'} (token: ${existsSync(bootstrapToken) ? 'present' : 'absent'})`); + const activeStr = existsSync(activeFile) ? readFileSync(activeFile, 'utf8').trim() : '(none)'; + console.log(`file: ${activeFile} → ${activeStr}`); + const { id, source } = resolveActiveTaskId(root); + console.log(`resolved: ${id || '(none)'} (source: ${source})`); +} + +// --------------------------------------------------------------------------- + +const [cmd, ...rest] = process.argv.slice(2); +try { + switch (cmd) { + case 'list': list(); break; + case 'show': + case undefined: + case null: + case '': show(); break; + case 'set': set(rest[0]); break; + case 'clear': clear(); break; + case 'check': check(rest[0]); break; + case 'init': await init(rest[0]); break; + case 'start': start(); break; + case 'create': create(rest); break; + case 'validate': validate(rest[0]); break; + case 'audit': audit(rest); break; + case 'resolve': resolveDebug(); break; + case '-h': case '--help': case 'help': + console.log([ + 'usage: task [args]', + '', + ' start begin guided onboarding (prints a chat trigger)', + ' list list available task manifests', + ' show show the active task and its scope', + ' set set the active task', + ' clear clear the active task', + ' check check a path against the active task', + ' init create a new task manifest interactively', + ' create [...] create a manifest non-interactively (flags)', + ' validate [] validate one or all manifests', + ' audit [--since N] show recent denials from the audit log', + ' resolve debug: show how the active task is resolved', + ].join('\n')); + break; + default: + bail(`unknown command: ${cmd}\nrun: task --help`, 2); + } +} catch (e) { + bail(e.message); +} diff --git a/agent-scope/hooks/pre-commit b/agent-scope/hooks/pre-commit new file mode 100755 index 000000000..fb8f4b3bf --- /dev/null +++ b/agent-scope/hooks/pre-commit @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# agent-scope pre-commit hook. Blocks commits that touch out-of-scope files. +# +# Install with: +# bash agent-scope/bin/install-git-hooks.sh +# Skip once (emergencies only): +# AGENT_SCOPE_SKIP=1 git commit ... + +set -euo pipefail + +if [[ "${AGENT_SCOPE_SKIP:-}" == "1" ]]; then + echo "agent-scope: pre-commit skipped (AGENT_SCOPE_SKIP=1)" >&2 + exit 0 +fi + +# Discover repo root robustly (hook may be called from any cwd). +repo_root=$(git rev-parse --show-toplevel) +cd "$repo_root" + +# Collect staged files (added/copied/modified/renamed). Zero-delimited for safety. +mapfile -d '' -t staged < <(git diff --cached --name-only --diff-filter=ACMR -z 2>/dev/null || true) + +if [[ ${#staged[@]} -eq 0 ]]; then + exit 0 +fi + +# Hardcoded refusal: never let the bootstrap token slip into a commit. +for p in "${staged[@]}"; do + if [[ "$p" == "agent-scope/.bootstrap-token" ]]; then + echo "agent-scope: REFUSING to commit agent-scope/.bootstrap-token." >&2 + echo " This file disables hardcoded path protection. Unstage it:" >&2 + echo " git restore --staged agent-scope/.bootstrap-token" >&2 + exit 1 + fi +done + +# Feed staged paths into scope-check via stdin. +# Exit codes: 0 = all allowed/exempt, 1 = at least one denied, 2 = usage error. +if printf '%s\n' "${staged[@]}" | node agent-scope/bin/scope-check.mjs --stdin; then + exit 0 +else + rc=$? + echo "" >&2 + echo "agent-scope: pre-commit blocked $rc-coded check — out-of-task files are staged." >&2 + echo "Options:" >&2 + echo " 1. Unstage them: git restore --staged " >&2 + echo " 2. Add the paths to the active task:" >&2 + echo " \$EDITOR agent-scope/tasks/\$(node agent-scope/bin/task.mjs show | head -1 | awk '{print \$NF}').json" >&2 + echo " 3. Commit under a different task: node agent-scope/bin/task.mjs set " >&2 + echo " 4. Emergency override (leaves an audit trail): AGENT_SCOPE_SKIP=1 git commit ..." >&2 + exit 1 +fi diff --git a/agent-scope/lib/denial.mjs b/agent-scope/lib/denial.mjs new file mode 100644 index 000000000..c5ec882cc --- /dev/null +++ b/agent-scope/lib/denial.mjs @@ -0,0 +1,409 @@ +// Builds structured denial payloads for every agent-scope enforcement layer. +// Each denial carries both a human-readable prose block AND a machine-readable +// JSON block delimited by the `agent-scope-menu` fence. Agents are instructed +// (via CLAUDE.md + .cursor/rules/agent-scope.mdc) to parse the JSON and surface +// the `options` array via AskQuestion — the plan-mode equivalent for denials. +// +// Zero IO, zero deps. Pure functions; unit-testable. + +import { listTasks, loadTask, checkPath, PROTECTED_PATTERNS } from './scope.mjs'; + +export const DENIAL_FENCE_START = ''; +export const DENIAL_FENCE_END = ''; + +// --------------------------------------------------------------------------- +// Suggestion heuristics +// --------------------------------------------------------------------------- + +// Propose a single representative glob for a denied path. Conservative: covers +// the immediate parent directory's subtree. Callers can suggest tighter globs +// interactively if the user prefers. +export function suggestGlob(relPath) { + if (typeof relPath !== 'string' || !relPath) return null; + const clean = relPath.replace(/\/+$/, ''); + const slash = clean.lastIndexOf('/'); + if (slash < 0) return clean; + const dir = clean.slice(0, slash); + return `${dir}/**`; +} + +// Propose a tighter glob targeting the exact basename stem (same directory, +// any extension). Useful when the agent is likely to touch sibling files. +export function suggestTightGlob(relPath) { + if (typeof relPath !== 'string' || !relPath) return null; + const clean = relPath.replace(/\/+$/, ''); + const slash = clean.lastIndexOf('/'); + const base = slash >= 0 ? clean.slice(slash + 1) : clean; + const dot = base.indexOf('.'); + const stem = dot > 0 ? base.slice(0, dot) : base; + if (!stem) return null; + const dir = slash >= 0 ? clean.slice(0, slash) : ''; + return dir ? `${dir}/${stem}*` : `${stem}*`; +} + +// Find other task manifests whose scope already covers the denied path. +// Skips the currently-active task. Protected paths have no alternatives. +export function findAlternativeTasks(relPath, root, excludeTaskId = null) { + if (!relPath || !root) return []; + const out = []; + let ids = []; + try { ids = listTasks(root); } catch { return []; } + for (const id of ids) { + if (id === excludeTaskId) continue; + let t; + try { t = loadTask(root, id); } catch { continue; } + let d; + try { d = checkPath(t, relPath, root); } catch { continue; } + if (d === 'allow' || d === 'exempt') { + out.push({ id, description: t.description || '' }); + } + } + return out; +} + +// --------------------------------------------------------------------------- +// Option menus +// --------------------------------------------------------------------------- + +// A free-text fallback. Included in every menu so the user can bypass the +// presets entirely. When picked, the agent asks the user to describe what to +// do next as a regular chat message. +const CUSTOM_OPTION = { + id: 'custom_instruction', + label: 'Let me type my own instruction', + action: { kind: 'custom' }, +}; + +// Menu for out-of-scope write denials (path is in the repo but not in scope). +export function buildOutOfScopeOptions({ deniedPath, activeTaskId, alternatives }) { + const opts = [ + { + id: 'add_file', + label: `Add "${deniedPath}" to ${activeTaskId}'s manifest`, + action: { kind: 'add_to_manifest', task: activeTaskId, patterns: [deniedPath] }, + }, + { + id: 'add_glob', + label: `Add "${suggestGlob(deniedPath)}" to ${activeTaskId}'s manifest`, + action: { kind: 'add_to_manifest', task: activeTaskId, patterns: [suggestGlob(deniedPath)] }, + }, + ]; + if (Array.isArray(alternatives) && alternatives.length) { + for (const alt of alternatives.slice(0, 3)) { + opts.push({ + id: `switch_task_${alt.id}`, + label: `Switch active task to "${alt.id}"` + (alt.description ? ` — ${alt.description}` : ''), + action: { kind: 'switch_task', task: alt.id }, + }); + } + } + opts.push( + { id: 'skip', label: 'Skip this edit, keep working on in-scope files', action: { kind: 'skip' } }, + { id: 'cancel', label: 'Cancel this turn — the edit should not happen', action: { kind: 'cancel' } }, + CUSTOM_OPTION, + ); + return opts; +} + +// Menu for protected-path denials — only the human can unlock. +export function buildProtectedOptions({ deniedPath }) { + return [ + { + id: 'bootstrap', + label: 'I need to modify agent-scope itself — please enable bootstrap', + action: { + kind: 'bootstrap', + instruction: 'Run in your own terminal: touch agent-scope/.bootstrap-token (then tell me "go"). When done, rm agent-scope/.bootstrap-token to re-lock.', + }, + }, + { id: 'skip', label: 'Skip this edit, it was not essential', action: { kind: 'skip' } }, + { id: 'cancel', label: 'Cancel this turn — I should not touch system files', action: { kind: 'cancel' } }, + CUSTOM_OPTION, + ]; +} + +// Menu for manifest load errors — the task file is broken. +export function buildLoadErrorOptions({ taskId, error }) { + return [ + { + id: 'fix_manifest', + label: `Open and fix agent-scope/tasks/${taskId}.json`, + action: { kind: 'fix_manifest', task: taskId, error }, + }, + { + id: 'clear_task', + label: 'Clear the active task for now (pnpm task clear)', + action: { kind: 'clear_task' }, + }, + { id: 'cancel', label: 'Cancel this turn', action: { kind: 'cancel' } }, + CUSTOM_OPTION, + ]; +} + +// Pick a sensible default for the highlighted option. Agents are instructed +// to respect this when surfacing the menu via AskQuestion, but it's only a +// recommendation — the user is always free to choose anything. +function recommendFor(reason, options) { + const ids = new Set(options.map(o => o.id)); + if (reason === 'out-of-scope') { + if (ids.has('add_glob')) return 'add_glob'; + if (ids.has('add_file')) return 'add_file'; + } + if (reason === 'protected') { + return 'cancel'; // safest default; user opts into bootstrap deliberately + } + if (reason === 'manifest-load-error') { + if (ids.has('fix_manifest')) return 'fix_manifest'; + } + return options[0]?.id || null; +} + +// --------------------------------------------------------------------------- +// Full denial message builders (prose + structured block) +// --------------------------------------------------------------------------- + +function wrapStructured(payload) { + return [ + DENIAL_FENCE_START, + JSON.stringify(payload, null, 2), + DENIAL_FENCE_END, + ].join('\n'); +} + +// Emit the human-readable prose and append the machine-readable JSON block. +// Agents are expected to find the fence and call AskQuestion with `options`. +function render(prose, structured) { + return [ + prose.trim(), + '', + '⇣ Plan-mode menu (agent: surface these options via AskQuestion):', + wrapStructured(structured), + ].join('\n'); +} + +// Build a preToolUse denial message. +export function buildPreToolUseDenial({ + tool, deniedPath, decision, task, taskId, root, +}) { + if (decision === 'protected') { + const options = buildProtectedOptions({ deniedPath }); + const structured = { + version: 1, + hook: 'preToolUse', + reason: 'protected', + tool, + deniedPath, + activeTask: taskId || null, + protectedPatterns: [...PROTECTED_PATTERNS], + options, + recommendedOptionId: recommendFor('protected', options), + agentReasoning: null, + }; + const prose = [ + `PROTECTED PATH — ${tool} blocked by agent-scope system policy.`, + ` Path: ${deniedPath}`, + ``, + `This path is part of the agent-scope enforcement system itself. Modifying`, + `it would weaken the guard, so it's blocked regardless of the active task.`, + ``, + `If this change is legitimate (improving agent-scope itself), ask the user`, + `to enable bootstrap: \`touch agent-scope/.bootstrap-token\` in their own`, + `terminal. Reminder: bootstrap disables protection for the whole session.`, + ].join('\n'); + return { message: render(prose, structured), structured }; + } + + // out-of-scope (deny) + const alternatives = findAlternativeTasks(deniedPath, root, taskId); + const options = buildOutOfScopeOptions({ deniedPath, activeTaskId: taskId, alternatives }); + const positives = ((task && task.allowed) || []).filter(p => !p.startsWith('!')); + const exemptions = ((task && task.exemptions) || []).filter(p => !p.startsWith('!')); + const structured = { + version: 1, + hook: 'preToolUse', + reason: 'out-of-scope', + tool, + deniedPath, + activeTask: taskId || null, + activeTaskDescription: (task && task.description) || null, + allowed: positives, + exemptions, + suggestedGlob: suggestGlob(deniedPath), + suggestedTightGlob: suggestTightGlob(deniedPath), + alternativeTasks: alternatives, + options, + recommendedOptionId: recommendFor('out-of-scope', options), + agentReasoning: null, + }; + const prose = [ + `OUT OF TASK SCOPE — ${tool} blocked by agent-scope.`, + ` Active task: ${taskId}${task && task.description ? ` — ${task.description}` : ''}`, + ` Denied path: ${deniedPath}`, + ``, + `This task only permits writes matching:`, + ...(positives.length ? positives.map(p => ` - ${p}`) : [' (nothing — manifest has no positive allows)']), + ...(exemptions.length ? ['', 'Plus always-allowed exemptions:', ...exemptions.map(p => ` - ${p}`)] : []), + ``, + `STOP. Do not retry via another tool or a different command form. Use the`, + `plan-mode menu below to ask the user how to proceed.`, + ].join('\n'); + return { message: render(prose, structured), structured }; +} + +// Build a manifest-load-error denial message. +export function buildLoadErrorDenial({ taskId, error }) { + const options = buildLoadErrorOptions({ taskId, error }); + const structured = { + version: 1, + hook: 'preToolUse', + reason: 'manifest-load-error', + activeTask: taskId, + error, + options, + recommendedOptionId: recommendFor('manifest-load-error', options), + agentReasoning: null, + }; + const prose = [ + `agent-scope: failed to load active task manifest "${taskId}".`, + ` Error: ${error}`, + ``, + `Fix agent-scope/tasks/${taskId}.json or clear the active task.`, + ].join('\n'); + return { message: render(prose, structured), structured }; +} + +// Build a beforeShellExecution denial message from a set of violations. +// A violation is { sub, cmd, path, decision }. +export function buildShellPrecheckDenial({ + command, violations, task, taskId, root, +}) { + const anyProtected = violations.some(v => String(v.decision).startsWith('protected')); + // Use the first out-of-scope path (if any) to seed the menu; if everything + // is protected, show the protected menu. If mixed, protected wins because + // the user needs bootstrap before we can address scope fixes. + let reason, options, suggestedFix; + const firstScopePath = violations.find(v => v.decision === 'deny')?.path || null; + const firstProtPath = violations.find(v => String(v.decision).startsWith('protected'))?.path || null; + + if (anyProtected) { + reason = 'protected'; + options = buildProtectedOptions({ deniedPath: firstProtPath || '(protected target)' }); + suggestedFix = 'enable bootstrap — see options'; + } else if (firstScopePath) { + reason = 'out-of-scope'; + const alternatives = findAlternativeTasks(firstScopePath, root, taskId); + options = buildOutOfScopeOptions({ + deniedPath: firstScopePath, activeTaskId: taskId, alternatives, + }); + suggestedFix = `add "${suggestGlob(firstScopePath)}" to ${taskId}'s manifest`; + } else { + reason = 'unknown'; + options = [ + { id: 'skip', label: 'Skip this command', action: { kind: 'skip' } }, + { id: 'cancel', label: 'Cancel this turn', action: { kind: 'cancel' } }, + CUSTOM_OPTION, + ]; + suggestedFix = null; + } + + const structured = { + version: 1, + hook: 'beforeShellExecution', + reason, + command, + activeTask: taskId || null, + violations: violations.map(v => ({ + cmd: v.cmd, path: v.path, decision: v.decision, + })), + suggestedFix, + options, + recommendedOptionId: recommendFor(reason, options), + agentReasoning: null, + }; + + const prose = [ + `Destructive shell command blocked by agent-scope pre-shell guard.`, + ` Active task: ${task ? task.id : '(none — only system protection applies)'}`, + ``, + `Violations:`, + ...violations.map(v => ` - ${v.cmd} ${v.path} [${v.decision}]`), + ``, + `STOP. The post-exec backstop would revert tracked files and delete`, + `untracked ones in denied paths anyway; use the menu below instead of`, + `retrying with a different command form.`, + ].join('\n'); + return { message: render(prose, structured), structured }; +} + +// Build an afterShellExecution context message. Unlike the other two this +// isn't a deny — the shell already ran. Files were reverted/deleted. Still +// emit a plan-mode menu so the agent surfaces the "what now?" question. +export function buildAfterShellContext({ + command, task, taskId, root, + reverted, deleted, unreverted, +}) { + reverted = Array.isArray(reverted) ? reverted : []; + deleted = Array.isArray(deleted) ? deleted : []; + unreverted = Array.isArray(unreverted) ? unreverted : []; + + const touched = [...reverted, ...deleted]; + const firstProtected = touched.find(p => { + for (const pat of PROTECTED_PATTERNS) { + const re = new RegExp('^' + pat.replace(/[.+^${}()|[\]\\]/g, '\\$&').replace(/\*\*/g, '.*').replace(/\*/g, '[^/]*') + '$'); + if (re.test(p)) return true; + } + return false; + }); + + let options, reason; + if (firstProtected) { + reason = 'protected'; + options = buildProtectedOptions({ deniedPath: firstProtected }); + } else if (touched.length && taskId) { + reason = 'out-of-scope'; + const alternatives = findAlternativeTasks(touched[0], root, taskId); + options = buildOutOfScopeOptions({ + deniedPath: touched[0], activeTaskId: taskId, alternatives, + }); + } else { + reason = 'unknown'; + options = [ + { id: 'acknowledge', label: 'Acknowledged — continue with other work', action: { kind: 'skip' } }, + { id: 'cancel', label: 'Cancel this turn', action: { kind: 'cancel' } }, + CUSTOM_OPTION, + ]; + } + + const structured = { + version: 1, + hook: 'afterShellExecution', + reason, + command, + activeTask: taskId || null, + reverted, + deleted, + unreverted: unreverted.map(u => ({ path: u.path, status: u.status, reason: u.reason })), + options, + recommendedOptionId: recommendFor(reason, options), + agentReasoning: null, + }; + + const lines = [ + `agent-scope: shell command modified out-of-task or protected files` + + (task ? ` (task: ${task.id}).` : ' (no active task — only protected paths enforced).'), + ]; + if (reverted.length) { + lines.push('', 'Reverted via `git checkout --`:'); + for (const p of reverted) lines.push(` - ${p}`); + } + if (deleted.length) { + lines.push('', 'Deleted (untracked, not allowed to persist):'); + for (const p of deleted) lines.push(` - ${p}`); + } + if (unreverted.length) { + lines.push('', 'Could NOT revert (please review manually):'); + for (const u of unreverted) lines.push(` - ${u.path} [${u.status}] ${u.reason}`); + } + + return { message: render(lines.join('\n'), structured), structured }; +} diff --git a/agent-scope/lib/denial.test.mjs b/agent-scope/lib/denial.test.mjs new file mode 100644 index 000000000..6485697ff --- /dev/null +++ b/agent-scope/lib/denial.test.mjs @@ -0,0 +1,525 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +import { + suggestGlob, suggestTightGlob, findAlternativeTasks, + buildOutOfScopeOptions, buildProtectedOptions, buildLoadErrorOptions, + buildPreToolUseDenial, buildLoadErrorDenial, + buildShellPrecheckDenial, buildAfterShellContext, + DENIAL_FENCE_START, DENIAL_FENCE_END, +} from './denial.mjs'; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeTempRepo() { + const root = mkdtempSync(join(tmpdir(), 'as-denial-')); + mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); + mkdirSync(join(root, 'agent-scope/lib'), { recursive: true }); + return root; +} +function writeTask(root, id, manifest) { + writeFileSync( + join(root, 'agent-scope/tasks', `${id}.json`), + JSON.stringify({ id, description: manifest.description || '', ...manifest }, null, 2) + ); +} +function cleanup(root) { rmSync(root, { recursive: true, force: true }); } + +function extractJson(message) { + const start = message.indexOf(DENIAL_FENCE_START); + const end = message.indexOf(DENIAL_FENCE_END); + assert.ok(start >= 0, 'message has begin fence'); + assert.ok(end > start, 'message has end fence'); + const body = message.slice(start + DENIAL_FENCE_START.length, end).trim(); + return JSON.parse(body); +} + +// --------------------------------------------------------------------------- +// suggestGlob +// --------------------------------------------------------------------------- + +test('suggestGlob: typical nested file', () => { + assert.equal(suggestGlob('packages/foo/src/bar.ts'), 'packages/foo/src/**'); +}); + +test('suggestGlob: top-level file', () => { + assert.equal(suggestGlob('README.md'), 'README.md'); +}); + +test('suggestGlob: empty / invalid', () => { + assert.equal(suggestGlob(''), null); + assert.equal(suggestGlob(undefined), null); + assert.equal(suggestGlob(null), null); + assert.equal(suggestGlob(42), null); +}); + +test('suggestGlob: trailing slash is stripped', () => { + assert.equal(suggestGlob('packages/foo/src/'), 'packages/foo/**'); +}); + +// --------------------------------------------------------------------------- +// suggestTightGlob +// --------------------------------------------------------------------------- + +test('suggestTightGlob: basename stem + sibling extensions', () => { + assert.equal(suggestTightGlob('packages/foo/src/bar.ts'), 'packages/foo/src/bar*'); +}); + +test('suggestTightGlob: multi-dot filename uses first-dot stem', () => { + assert.equal(suggestTightGlob('packages/foo/bar.test.ts'), 'packages/foo/bar*'); +}); + +test('suggestTightGlob: extensionless', () => { + assert.equal(suggestTightGlob('scripts/build'), 'scripts/build*'); +}); + +test('suggestTightGlob: dotfile keeps the full basename', () => { + // leading-dot filenames have no conventional "stem + ext" split; use as-is + assert.equal(suggestTightGlob('.env'), '.env*'); +}); + +test('suggestTightGlob: empty input returns null', () => { + assert.equal(suggestTightGlob(''), null); + assert.equal(suggestTightGlob(undefined), null); +}); + +// --------------------------------------------------------------------------- +// findAlternativeTasks +// --------------------------------------------------------------------------- + +test('findAlternativeTasks: finds a task that covers the path', () => { + const root = makeTempRepo(); + try { + writeTask(root, 'staking', { description: 'Staking work', allowed: ['packages/evm-module/contracts/**'] }); + writeTask(root, 'sync', { description: 'Sync work', allowed: ['packages/sync/**'] }); + const r = findAlternativeTasks('packages/evm-module/contracts/Stk.sol', root, 'sync'); + assert.equal(r.length, 1); + assert.equal(r[0].id, 'staking'); + assert.equal(r[0].description, 'Staking work'); + } finally { cleanup(root); } +}); + +test('findAlternativeTasks: excludes the current task', () => { + const root = makeTempRepo(); + try { + writeTask(root, 'wide', { allowed: ['**/*'] }); + const r = findAlternativeTasks('any/file.ts', root, 'wide'); + assert.equal(r.length, 0); + } finally { cleanup(root); } +}); + +test('findAlternativeTasks: returns [] when no manifests match', () => { + const root = makeTempRepo(); + try { + writeTask(root, 'narrow', { allowed: ['packages/only/**'] }); + const r = findAlternativeTasks('totally/unrelated/file.ts', root, null); + assert.equal(r.length, 0); + } finally { cleanup(root); } +}); + +test('findAlternativeTasks: skips broken manifests silently', () => { + const root = makeTempRepo(); + try { + writeTask(root, 'good', { allowed: ['**/*'] }); + writeFileSync(join(root, 'agent-scope/tasks/broken.json'), '{ not valid json'); + const r = findAlternativeTasks('x/y.ts', root, null); + assert.equal(r.length, 1); + assert.equal(r[0].id, 'good'); + } finally { cleanup(root); } +}); + +// --------------------------------------------------------------------------- +// buildOutOfScopeOptions +// --------------------------------------------------------------------------- + +test('buildOutOfScopeOptions: base menu has add_file, add_glob, skip, cancel, custom_instruction', () => { + const opts = buildOutOfScopeOptions({ + deniedPath: 'packages/foo/bar.ts', activeTaskId: 'my-task', alternatives: [], + }); + const ids = opts.map(o => o.id); + assert.ok(ids.includes('add_file')); + assert.ok(ids.includes('add_glob')); + assert.ok(ids.includes('skip')); + assert.ok(ids.includes('cancel')); + assert.ok(ids.includes('custom_instruction')); +}); + +test('buildOutOfScopeOptions: custom_instruction is the free-text fallback', () => { + const opts = buildOutOfScopeOptions({ + deniedPath: 'x/y.ts', activeTaskId: 't', alternatives: [], + }); + const custom = opts.find(o => o.id === 'custom_instruction'); + assert.ok(custom, 'custom option present'); + assert.equal(custom.action.kind, 'custom'); + assert.match(custom.label, /type/i); +}); + +test('buildOutOfScopeOptions: add_file action has the exact path', () => { + const opts = buildOutOfScopeOptions({ + deniedPath: 'packages/foo/bar.ts', activeTaskId: 'my-task', alternatives: [], + }); + const addFile = opts.find(o => o.id === 'add_file'); + assert.equal(addFile.action.kind, 'add_to_manifest'); + assert.equal(addFile.action.task, 'my-task'); + assert.deepEqual(addFile.action.patterns, ['packages/foo/bar.ts']); +}); + +test('buildOutOfScopeOptions: add_glob uses suggestGlob', () => { + const opts = buildOutOfScopeOptions({ + deniedPath: 'packages/foo/bar.ts', activeTaskId: 't', alternatives: [], + }); + const addGlob = opts.find(o => o.id === 'add_glob'); + assert.deepEqual(addGlob.action.patterns, ['packages/foo/**']); +}); + +test('buildOutOfScopeOptions: switch options are added per alternative (max 3)', () => { + const alternatives = [ + { id: 'a', description: 'A' }, + { id: 'b', description: 'B' }, + { id: 'c', description: 'C' }, + { id: 'd', description: 'D' }, + ]; + const opts = buildOutOfScopeOptions({ + deniedPath: 'x/y.ts', activeTaskId: 't', alternatives, + }); + const switchIds = opts.filter(o => o.id.startsWith('switch_task_')).map(o => o.id); + assert.equal(switchIds.length, 3); + assert.deepEqual(switchIds, ['switch_task_a', 'switch_task_b', 'switch_task_c']); +}); + +// --------------------------------------------------------------------------- +// buildProtectedOptions +// --------------------------------------------------------------------------- + +test('buildProtectedOptions: bootstrap + skip + cancel + custom_instruction', () => { + const opts = buildProtectedOptions({ deniedPath: '.cursor/hooks/x.mjs' }); + assert.deepEqual( + opts.map(o => o.id), + ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ); + assert.equal(opts[0].action.kind, 'bootstrap'); + assert.ok(opts[0].action.instruction.includes('bootstrap-token')); +}); + +// --------------------------------------------------------------------------- +// buildLoadErrorOptions +// --------------------------------------------------------------------------- + +test('buildLoadErrorOptions: fix, clear, cancel, custom_instruction', () => { + const opts = buildLoadErrorOptions({ taskId: 'broken', error: 'syntax' }); + assert.deepEqual( + opts.map(o => o.id), + ['fix_manifest', 'clear_task', 'cancel', 'custom_instruction'], + ); + assert.equal(opts[0].action.task, 'broken'); +}); + +// --------------------------------------------------------------------------- +// buildPreToolUseDenial +// --------------------------------------------------------------------------- + +test('buildPreToolUseDenial: protected → structured protected menu', () => { + const root = makeTempRepo(); + try { + const { message, structured } = buildPreToolUseDenial({ + tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', + decision: 'protected', task: null, taskId: null, root, + }); + const parsed = extractJson(message); + assert.equal(parsed.hook, 'preToolUse'); + assert.equal(parsed.reason, 'protected'); + assert.equal(parsed.deniedPath, '.cursor/hooks/x.mjs'); + assert.ok(parsed.protectedPatterns.length > 0); + assert.deepEqual( + parsed.options.map(o => o.id), + ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ); + assert.equal(parsed.recommendedOptionId, 'cancel'); + assert.equal(parsed.agentReasoning, null, 'agent fills this in when surfacing'); + assert.equal(structured.reason, 'protected'); + assert.ok(message.includes('PROTECTED PATH')); + } finally { cleanup(root); } +}); + +test('buildPreToolUseDenial: out-of-scope → full metadata + alternatives', () => { + const root = makeTempRepo(); + try { + writeTask(root, 'staking', { description: 'stk', allowed: ['packages/evm-module/**'] }); + const task = { id: 'sync', description: 'Sync', + allowed: ['packages/sync/**'], exemptions: ['**/dist/**'] }; + const { message } = buildPreToolUseDenial({ + tool: 'StrReplace', deniedPath: 'packages/evm-module/contracts/S.sol', + decision: 'deny', task, taskId: 'sync', root, + }); + const p = extractJson(message); + assert.equal(p.reason, 'out-of-scope'); + assert.equal(p.deniedPath, 'packages/evm-module/contracts/S.sol'); + assert.equal(p.activeTask, 'sync'); + assert.deepEqual(p.allowed, ['packages/sync/**']); + assert.deepEqual(p.exemptions, ['**/dist/**']); + assert.equal(p.suggestedGlob, 'packages/evm-module/contracts/**'); + assert.equal(p.alternativeTasks.length, 1); + assert.equal(p.alternativeTasks[0].id, 'staking'); + const ids = p.options.map(o => o.id); + assert.ok(ids.includes('add_file')); + assert.ok(ids.includes('switch_task_staking')); + assert.ok(ids.includes('custom_instruction')); + assert.equal(p.recommendedOptionId, 'add_glob'); + assert.equal(p.agentReasoning, null); + assert.ok(message.includes('OUT OF TASK SCOPE')); + } finally { cleanup(root); } +}); + +test('buildPreToolUseDenial: message has both fences and is JSON-parseable', () => { + const root = makeTempRepo(); + try { + const { message } = buildPreToolUseDenial({ + tool: 'Write', deniedPath: '.cursor/hooks/y.mjs', + decision: 'protected', task: null, taskId: null, root, + }); + assert.ok(message.includes(DENIAL_FENCE_START)); + assert.ok(message.includes(DENIAL_FENCE_END)); + const p = extractJson(message); + assert.equal(p.version, 1); + } finally { cleanup(root); } +}); + +// --------------------------------------------------------------------------- +// buildLoadErrorDenial +// --------------------------------------------------------------------------- + +test('buildLoadErrorDenial: structured with error + menu', () => { + const { message, structured } = buildLoadErrorDenial({ + taskId: 'my-task', error: 'Unexpected token', + }); + const p = extractJson(message); + assert.equal(p.hook, 'preToolUse'); + assert.equal(p.reason, 'manifest-load-error'); + assert.equal(p.activeTask, 'my-task'); + assert.equal(p.error, 'Unexpected token'); + assert.deepEqual( + p.options.map(o => o.id), + ['fix_manifest', 'clear_task', 'cancel', 'custom_instruction'], + ); + assert.equal(p.recommendedOptionId, 'fix_manifest'); + assert.equal(structured.error, 'Unexpected token'); +}); + +// --------------------------------------------------------------------------- +// buildShellPrecheckDenial +// --------------------------------------------------------------------------- + +test('buildShellPrecheckDenial: protected violation → protected menu', () => { + const root = makeTempRepo(); + try { + const task = null; + const violations = [ + { sub: 'rm -rf .cursor/hooks', cmd: 'rm', path: '.cursor/hooks', decision: 'protected (covers)' }, + ]; + const { message } = buildShellPrecheckDenial({ + command: 'rm -rf .cursor/hooks', violations, task, taskId: null, root, + }); + const p = extractJson(message); + assert.equal(p.hook, 'beforeShellExecution'); + assert.equal(p.reason, 'protected'); + assert.equal(p.command, 'rm -rf .cursor/hooks'); + assert.equal(p.violations.length, 1); + assert.deepEqual( + p.options.map(o => o.id), + ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ); + assert.equal(p.recommendedOptionId, 'cancel'); + } finally { cleanup(root); } +}); + +test('buildShellPrecheckDenial: pure out-of-scope → full menu', () => { + const root = makeTempRepo(); + try { + writeTask(root, 'other', { allowed: ['packages/evm-module/**'] }); + const task = { id: 'sync', allowed: ['packages/sync/**'] }; + const violations = [ + { sub: 'rm packages/evm-module/contracts/x.sol', cmd: 'rm', + path: 'packages/evm-module/contracts/x.sol', decision: 'deny' }, + ]; + const { message } = buildShellPrecheckDenial({ + command: 'rm packages/evm-module/contracts/x.sol', + violations, task, taskId: 'sync', root, + }); + const p = extractJson(message); + assert.equal(p.reason, 'out-of-scope'); + assert.equal(p.suggestedFix.includes('packages/evm-module/contracts/**'), true); + const ids = p.options.map(o => o.id); + assert.ok(ids.includes('add_file')); + assert.ok(ids.includes('switch_task_other')); + } finally { cleanup(root); } +}); + +test('buildShellPrecheckDenial: mixed protected+out-of-scope → protected wins', () => { + const root = makeTempRepo(); + try { + const task = { id: 'x', allowed: ['only/**'] }; + const violations = [ + { sub: '1', cmd: 'rm', path: 'other/file.ts', decision: 'deny' }, + { sub: '2', cmd: 'rm', path: '.cursor/hooks/x.mjs', decision: 'protected' }, + ]; + const { message } = buildShellPrecheckDenial({ + command: '...', violations, task, taskId: 'x', root, + }); + const p = extractJson(message); + assert.equal(p.reason, 'protected'); + assert.deepEqual( + p.options.map(o => o.id), + ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ); + } finally { cleanup(root); } +}); + +// --------------------------------------------------------------------------- +// buildAfterShellContext +// --------------------------------------------------------------------------- + +test('buildAfterShellContext: reverted + deleted in message', () => { + const root = makeTempRepo(); + try { + const { message } = buildAfterShellContext({ + command: 'whatever', task: { id: 'sync' }, taskId: 'sync', root, + reverted: ['packages/other/x.ts'], + deleted: ['.cursor/hooks/bad.mjs'], + unreverted: [], + }); + assert.ok(message.includes('Reverted via')); + assert.ok(message.includes('Deleted (untracked')); + assert.ok(message.includes('packages/other/x.ts')); + assert.ok(message.includes('.cursor/hooks/bad.mjs')); + const p = extractJson(message); + assert.equal(p.hook, 'afterShellExecution'); + assert.equal(p.reason, 'protected'); // protected detected in deleted[] + assert.deepEqual(p.reverted, ['packages/other/x.ts']); + assert.deepEqual(p.deleted, ['.cursor/hooks/bad.mjs']); + } finally { cleanup(root); } +}); + +test('buildAfterShellContext: no protected → out-of-scope menu', () => { + const root = makeTempRepo(); + try { + const { message } = buildAfterShellContext({ + command: 'x', task: { id: 'sync' }, taskId: 'sync', root, + reverted: ['packages/other/x.ts'], + deleted: [], unreverted: [], + }); + const p = extractJson(message); + assert.equal(p.reason, 'out-of-scope'); + assert.ok(p.options.some(o => o.id === 'add_file')); + } finally { cleanup(root); } +}); + +test('buildAfterShellContext: nothing touched → unknown menu', () => { + const root = makeTempRepo(); + try { + const { message } = buildAfterShellContext({ + command: 'x', task: null, taskId: null, root, + reverted: [], deleted: [], unreverted: [], + }); + const p = extractJson(message); + assert.equal(p.reason, 'unknown'); + assert.ok(p.options.some(o => o.id === 'acknowledge')); + } finally { cleanup(root); } +}); + +// --------------------------------------------------------------------------- +// Structural invariants (all builders) +// --------------------------------------------------------------------------- + +test('every builder emits version:1 and well-formed options', () => { + const root = makeTempRepo(); + try { + const cases = [ + buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', + task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), + buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', + decision: 'protected', task: null, taskId: null, root }), + buildLoadErrorDenial({ taskId: 't', error: 'bad' }), + buildShellPrecheckDenial({ command: 'rm x', + violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], + task: { id: 't' }, taskId: 't', root }), + buildAfterShellContext({ command: 'x', + task: { id: 't' }, taskId: 't', root, + reverted: ['a.ts'], deleted: [], unreverted: [] }), + ]; + for (const { message, structured } of cases) { + const p = extractJson(message); + assert.equal(p.version, 1); + assert.ok(Array.isArray(p.options)); + assert.ok(p.options.length >= 2); + for (const opt of p.options) { + assert.ok(typeof opt.id === 'string' && opt.id.length > 0); + assert.ok(typeof opt.label === 'string' && opt.label.length > 0); + assert.ok(opt.action && typeof opt.action.kind === 'string'); + } + assert.equal(structured.version, 1); + } + } finally { cleanup(root); } +}); + +test('every denial builder sets recommendedOptionId to a valid option', () => { + const root = makeTempRepo(); + try { + const cases = [ + buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', + task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), + buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', + decision: 'protected', task: null, taskId: null, root }), + buildLoadErrorDenial({ taskId: 't', error: 'bad' }), + buildShellPrecheckDenial({ command: 'rm x', + violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], + task: { id: 't' }, taskId: 't', root }), + buildAfterShellContext({ command: 'x', + task: { id: 't' }, taskId: 't', root, + reverted: ['a.ts'], deleted: [], unreverted: [] }), + ]; + for (const { message } of cases) { + const p = extractJson(message); + assert.ok( + typeof p.recommendedOptionId === 'string' && p.recommendedOptionId.length, + 'recommendedOptionId is a non-empty string', + ); + const ids = p.options.map(o => o.id); + assert.ok( + ids.includes(p.recommendedOptionId), + `recommended "${p.recommendedOptionId}" must be in the options list`, + ); + assert.equal(p.agentReasoning, null, + 'agentReasoning is a null placeholder the agent fills in via AskQuestion prompt'); + } + } finally { cleanup(root); } +}); + +test('custom_instruction option appears in every denial menu', () => { + const root = makeTempRepo(); + try { + const cases = [ + buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', + task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), + buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', + decision: 'protected', task: null, taskId: null, root }), + buildLoadErrorDenial({ taskId: 't', error: 'bad' }), + buildShellPrecheckDenial({ command: 'rm x', + violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], + task: { id: 't' }, taskId: 't', root }), + buildAfterShellContext({ command: 'x', + task: { id: 't' }, taskId: 't', root, + reverted: ['a.ts'], deleted: [], unreverted: [] }), + ]; + for (const { message } of cases) { + const p = extractJson(message); + const custom = p.options.find(o => o.id === 'custom_instruction'); + assert.ok(custom, 'custom_instruction present in every denial menu'); + assert.equal(custom.action.kind, 'custom'); + } + } finally { cleanup(root); } +}); diff --git a/agent-scope/lib/log.mjs b/agent-scope/lib/log.mjs new file mode 100644 index 000000000..b42ace315 --- /dev/null +++ b/agent-scope/lib/log.mjs @@ -0,0 +1,89 @@ +// Append-only JSONL audit log + optional webhook sink. +// Safe to call from any hook; failure is silent (audit loss > blocking work). + +import { + appendFileSync, mkdirSync, existsSync, statSync, renameSync, readdirSync, unlinkSync, +} from 'node:fs'; +import { resolve } from 'node:path'; + +// Roll over at 5MB and keep up to MAX_ROTATIONS old files. +export const MAX_BYTES = 5 * 1024 * 1024; +export const MAX_ROTATIONS = 5; + +function rotateIfNeeded(file) { + try { + if (!existsSync(file)) return; + const { size } = statSync(file); + if (size < MAX_BYTES) return; + const ts = new Date().toISOString().replace(/[:.]/g, '-'); + renameSync(file, `${file}.${ts}`); + pruneOldRotations(file); + } catch { /* noop */ } +} + +function pruneOldRotations(file) { + try { + const dir = resolve(file, '..'); + const base = file.split('/').pop(); + const rotations = readdirSync(dir) + .filter(f => f.startsWith(base + '.')) + .map(f => ({ f, full: resolve(dir, f) })) + .sort((a, b) => a.f.localeCompare(b.f)); + while (rotations.length > MAX_ROTATIONS) { + const { full } = rotations.shift(); + unlinkSync(full); + } + } catch { /* noop */ } +} + +function writeLine(root, bucket, record) { + try { + const dir = resolve(root, 'agent-scope/logs'); + if (!existsSync(dir)) mkdirSync(dir, { recursive: true }); + const file = resolve(dir, `${bucket}.jsonl`); + rotateIfNeeded(file); + const line = JSON.stringify({ ts: new Date().toISOString(), ...record }) + '\n'; + appendFileSync(file, line, 'utf8'); + } catch { /* never let logging break the hook */ } +} + +export function logDenial(root, record) { + writeLine(root, 'denials', record); + postWebhook('denial', record); +} + +export function logDecision(root, record) { + writeLine(root, 'decisions', record); +} + +// --------------------------------------------------------------------------- +// Optional webhook sink. Activated when AGENT_SCOPE_WEBHOOK is set to an +// http(s) URL. POSTs the event as JSON (fire-and-forget, 1500 ms timeout). +// The receiver can forward into the DKG, Slack, a log aggregator, etc. +// --------------------------------------------------------------------------- + +function postWebhook(event, record) { + const url = process.env.AGENT_SCOPE_WEBHOOK; + if (!url || !/^https?:\/\//.test(url)) return; + if (typeof globalThis.fetch !== 'function') return; // Node < 18 + + const body = JSON.stringify({ + event, + repo: process.env.AGENT_SCOPE_REPO || null, + host: process.env.HOSTNAME || null, + user: process.env.USER || null, + ts: new Date().toISOString(), + ...record, + }); + + try { + const controller = new AbortController(); + const timeout = setTimeout(() => controller.abort(), 1500); + globalThis.fetch(url, { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body, + signal: controller.signal, + }).then(() => clearTimeout(timeout)).catch(() => clearTimeout(timeout)); + } catch { /* noop */ } +} diff --git a/agent-scope/lib/scope.mjs b/agent-scope/lib/scope.mjs new file mode 100644 index 000000000..e4f78ef72 --- /dev/null +++ b/agent-scope/lib/scope.mjs @@ -0,0 +1,415 @@ +// Shared scope-check library. Zero runtime dependencies; must work from +// Cursor hooks, git hooks, CLI, and CI. Node 20+. +// +// Bootstrap modes (disables hardcoded protection): +// 1. env: AGENT_SCOPE_BOOTSTRAP=1 +// 2. file: agent-scope/.bootstrap-token exists +// Token file is itself protected — only the human can create/remove it from +// outside the agent sandbox. Intentional convention: git-visible. + +import { readFileSync, existsSync, readdirSync, statSync } from 'node:fs'; +import { resolve, relative, sep, dirname, isAbsolute } from 'node:path'; +import { execFileSync } from 'node:child_process'; +import { fileURLToPath } from 'node:url'; + +// --------------------------------------------------------------------------- +// Node version check +// --------------------------------------------------------------------------- + +const MIN_NODE_MAJOR = 20; + +export function checkNodeVersion(minMajor = MIN_NODE_MAJOR) { + const m = /^v(\d+)\./.exec(process.version); + const major = m ? parseInt(m[1], 10) : 0; + if (major < minMajor) { + throw new Error( + `agent-scope requires Node ${minMajor}+ but found ${process.version}. ` + + `Update Node (nvm install 22) and retry.` + ); + } +} + +// --------------------------------------------------------------------------- +// Protected paths +// --------------------------------------------------------------------------- + +export const PROTECTED_PATTERNS = [ + '.cursor/hooks/**', + '.cursor/hooks.json', + '.cursor/rules/agent-scope.mdc', + 'agent-scope/lib/**', + 'agent-scope/bin/**', + 'agent-scope/hooks/**', + 'agent-scope/schema/**', + 'agent-scope/tasks/**', + 'agent-scope/active', + 'agent-scope/.bootstrap-token', + '.git/hooks/**', + '.github/workflows/agent-scope.yml', +]; + +function bootstrapActive(root) { + if (process.env.AGENT_SCOPE_BOOTSTRAP === '1') return true; + try { + const p = resolve(root || resolveRepoRoot(), 'agent-scope/.bootstrap-token'); + return existsSync(p); + } catch { return false; } +} + +export function isBootstrapActive(root) { return bootstrapActive(root); } + +export function checkProtected(relPath, root) { + if (!relPath || typeof relPath !== 'string') return 'deny'; + if (bootstrapActive(root)) return 'allow'; + for (const pattern of PROTECTED_PATTERNS) { + if (globToRegex(pattern).test(relPath)) return 'deny'; + } + return 'allow'; +} + +// Returns true if `relPath` is a directory that CONTAINS any protected path +// (i.e. a destructive recursive op against it would wipe protected files). +// Used by the pre-shell hook for `rm -rf `, `find -delete`, etc. +export function coversProtected(relPath, root) { + if (!relPath || typeof relPath !== 'string') return false; + if (bootstrapActive(root)) return false; + const norm = relPath.replace(/\/+$/, ''); + if (!norm) return false; + const prefix = norm + '/'; + for (const pattern of PROTECTED_PATTERNS) { + const literal = pattern + .replace(/\/\*\*\/?$/, '/') + .replace(/\/\*$/, '/') + .replace(/\*+/g, ''); + if (!literal) continue; + if (literal === norm || literal === prefix) return true; + if (literal.startsWith(prefix)) return true; + } + return false; +} + +// --------------------------------------------------------------------------- +// Glob +// --------------------------------------------------------------------------- + +function globToRegex(glob) { + let re = '^'; + let i = 0; + while (i < glob.length) { + const c = glob[i]; + if (c === '*') { + if (glob[i + 1] === '*') { + re += '.*'; + i += 2; + if (glob[i] === '/') i++; + } else { + re += '[^/]*'; + i++; + } + } else if (c === '?') { + re += '[^/]'; + i++; + } else if ('.+^$(){}|[]\\'.includes(c)) { + re += '\\' + c; + i++; + } else { + re += c; + i++; + } + } + re += '$'; + return new RegExp(re); +} + +function matchAnyPositive(patterns, relPath) { + if (!Array.isArray(patterns)) return null; + for (const p of patterns) { + if (typeof p !== 'string' || p.startsWith('!')) continue; + if (globToRegex(p).test(relPath)) return p; + } + return null; +} + +function matchAnyNegation(patterns, relPath) { + if (!Array.isArray(patterns)) return null; + for (const p of patterns) { + if (typeof p !== 'string' || !p.startsWith('!')) continue; + if (globToRegex(p.slice(1)).test(relPath)) return p; + } + return null; +} + +// --------------------------------------------------------------------------- +// Path + repo root +// --------------------------------------------------------------------------- + +export function resolveRepoRoot(startDir) { + if (process.env.AGENT_SCOPE_ROOT) return process.env.AGENT_SCOPE_ROOT; + let dir = startDir || process.cwd(); + for (let i = 0; i < 64; i++) { + if (existsSync(resolve(dir, 'agent-scope'))) return dir; + const parent = dirname(dir); + if (parent === dir) break; + dir = parent; + } + return startDir || process.cwd(); +} + +export function normalizeToRepoPath(root, p) { + if (!p) return ''; + const abs = isAbsolute(p) ? p : resolve(root, p); + let rel = relative(root, abs); + if (sep !== '/') rel = rel.split(sep).join('/'); + return rel; +} + +// --------------------------------------------------------------------------- +// Active task resolution +// --------------------------------------------------------------------------- + +function readFileOrNull(p) { + try { return readFileSync(p, 'utf8'); } catch { return null; } +} + +function safeGit(root, args) { + try { + return execFileSync('git', args, { + cwd: root, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + timeout: 3000, + }).trim(); + } catch { + return null; + } +} + +const BRANCH_TASK_RE = /^(?:task|agent-scope)\/([a-z0-9][a-z0-9-_.]{0,63})(?:\/|$)/; + +export function detectTaskFromBranch(root) { + const branch = safeGit(root, ['rev-parse', '--abbrev-ref', 'HEAD']); + if (!branch || branch === 'HEAD') return null; + const m = BRANCH_TASK_RE.exec(branch); + return m ? m[1] : null; +} + +export function detectTaskFromGitConfig(root) { + const v = safeGit(root, ['config', '--get', 'agent-scope.task']); + return v || null; +} + +export function resolveActiveTaskId(root, opts = {}) { + const fromEnv = process.env.AGENT_SCOPE_TASK; + if (fromEnv && fromEnv.trim()) return { id: fromEnv.trim(), source: 'env' }; + + const activeFile = resolve(root, 'agent-scope/active'); + const fromFile = readFileOrNull(activeFile); + if (fromFile && fromFile.trim()) return { id: fromFile.trim(), source: 'file' }; + + if (!opts.noBranch) { + const fromBranch = detectTaskFromBranch(root); + if (fromBranch) return { id: fromBranch, source: 'branch' }; + } + + if (!opts.noGitConfig) { + const fromCfg = detectTaskFromGitConfig(root); + if (fromCfg) return { id: fromCfg, source: 'git-config' }; + } + + return { id: null, source: 'none' }; +} + +export function getActiveTaskId(root) { + return resolveActiveTaskId(root).id; +} + +// --------------------------------------------------------------------------- +// Manifest loading + validation + inheritance +// --------------------------------------------------------------------------- + +export function listTasks(root) { + const dir = resolve(root, 'agent-scope/tasks'); + if (!existsSync(dir)) return []; + return readdirSync(dir) + .filter(f => f.endsWith('.json')) + .map(f => f.replace(/\.json$/, '')) + .sort(); +} + +const ALLOWED_KEYS = ['id','description','owner','created','allowed','exemptions','notes','dkg','inherits']; + +export function validateManifest(obj, expectedId) { + const errors = []; + if (!obj || typeof obj !== 'object' || Array.isArray(obj)) { + errors.push('manifest must be a JSON object'); + return errors; + } + if (typeof obj.id !== 'string' || !/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(obj.id)) { + errors.push('id must be a string matching /^[a-z0-9][a-z0-9-_.]{0,63}$/'); + } + if (expectedId && obj.id && obj.id !== expectedId) { + errors.push(`id '${obj.id}' does not match filename '${expectedId}'`); + } + const hasInherits = Array.isArray(obj.inherits) && obj.inherits.length > 0; + const hasAllowed = Array.isArray(obj.allowed); + const hasExemptions = Array.isArray(obj.exemptions) && obj.exemptions.length > 0; + if (!hasAllowed && !hasInherits && !hasExemptions) { + errors.push('at least one of allowed / inherits / exemptions must be provided'); + } + if (obj.allowed !== undefined) { + if (!Array.isArray(obj.allowed)) errors.push('allowed must be an array'); + else obj.allowed.forEach((p, i) => { + if (typeof p !== 'string' || !p.length) errors.push(`allowed[${i}] must be a non-empty string`); + }); + } + if (obj.exemptions !== undefined) { + if (!Array.isArray(obj.exemptions)) errors.push('exemptions must be an array'); + else obj.exemptions.forEach((p, i) => { + if (typeof p !== 'string' || !p.length) errors.push(`exemptions[${i}] must be a non-empty string`); + }); + } + if (obj.inherits !== undefined) { + if (!Array.isArray(obj.inherits)) errors.push('inherits must be an array of task ids'); + else obj.inherits.forEach((id, i) => { + if (typeof id !== 'string' || !/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(id)) { + errors.push(`inherits[${i}] must match /^[a-z0-9][a-z0-9-_.]{0,63}$/`); + } + }); + } + for (const k of Object.keys(obj)) { + if (!ALLOWED_KEYS.includes(k)) errors.push(`unknown property: ${k}`); + } + return errors; +} + +function loadAndResolve(root, id, seen = new Set(), chain = []) { + if (seen.has(id)) { + throw new Error(`inheritance cycle detected: ${[...chain, id].join(' -> ')}`); + } + seen.add(id); + + const manifestPath = resolve(root, 'agent-scope/tasks', `${id}.json`); + if (!existsSync(manifestPath)) { + throw new Error(`Task manifest not found: ${manifestPath}` + (chain.length ? ` (inherited from ${chain.join(' -> ')})` : '')); + } + const raw = readFileSync(manifestPath, 'utf8'); + let parsed; + try { parsed = JSON.parse(raw); } + catch (e) { throw new Error(`Task manifest is not valid JSON: ${manifestPath}: ${e.message}`); } + + const errors = validateManifest(parsed, id); + if (errors.length) { + throw new Error(`Invalid task manifest ${manifestPath}:\n - ${errors.join('\n - ')}`); + } + parsed.allowed = parsed.allowed || []; + parsed.exemptions = parsed.exemptions || []; + + const merged = { allowed: [], exemptions: [] }; + for (const parentId of parsed.inherits || []) { + const parent = loadAndResolve(root, parentId, new Set(seen), [...chain, id]); + merged.allowed.push(...parent.allowed); + merged.exemptions.push(...parent.exemptions); + } + merged.allowed.push(...parsed.allowed); + merged.exemptions.push(...parsed.exemptions); + + return { + ...parsed, + allowed: dedupe(merged.allowed), + exemptions: dedupe(merged.exemptions), + __path: manifestPath, + __inheritedFrom: parsed.inherits || [], + }; +} + +function dedupe(arr) { + const seen = new Set(); + const out = []; + for (const x of arr) { if (!seen.has(x)) { seen.add(x); out.push(x); } } + return out; +} + +export function loadTask(root, id) { + if (!id) return null; + return loadAndResolve(root, id); +} + +// --------------------------------------------------------------------------- +// Core decision +// --------------------------------------------------------------------------- + +export function checkPath(task, relPath, root) { + if (typeof relPath !== 'string' || relPath.length === 0) return 'deny'; + if (relPath.includes('..')) return 'deny'; + + if (checkProtected(relPath, root) === 'deny') return 'protected'; + + if (!task) return 'allow'; + + if (matchAnyNegation(task.allowed, relPath)) return 'deny'; + if (matchAnyNegation(task.exemptions, relPath)) return 'deny'; + if (matchAnyPositive(task.exemptions, relPath)) return 'exempt'; + if (matchAnyPositive(task.allowed, relPath)) return 'allow'; + return 'deny'; +} + +export function explainDeny(task, relPath, decision) { + if (decision === 'protected') { + return [ + `PROTECTED PATH — write blocked by system policy.`, + `Path: ${relPath}`, + ``, + `This path is part of the agent-scope enforcement system. Modifying it`, + `would weaken the very mechanism that keeps agent work in-scope, so`, + `writes are blocked regardless of the active task.`, + ``, + `If this change is legitimate (e.g. you're improving agent-scope itself),`, + `ask the user to enable bootstrap mode (touch agent-scope/.bootstrap-token`, + `in their own terminal, or set AGENT_SCOPE_BOOTSTRAP=1 in their env).`, + ``, + `Protected patterns:`, + ...PROTECTED_PATTERNS.map(p => ` - ${p}`), + ].join('\n'); + } + if (!task) return ''; + + const positives = (task.allowed || []).filter(p => !p.startsWith('!')); + const negatives = (task.allowed || []).filter(p => p.startsWith('!')) + .concat((task.exemptions || []).filter(p => p.startsWith('!'))); + const exemptions = (task.exemptions || []).filter(p => !p.startsWith('!')); + + const lines = [ + `OUT OF TASK SCOPE.`, + `Active task: ${task.id} — ${task.description || ''}`, + `Denied path: ${relPath}`, + ``, + `This task only permits writes to paths matching:`, + ...(positives.length ? positives.map(p => ` - ${p}`) : [' (nothing)']), + ]; + if (exemptions.length) { + lines.push('', 'Exempted patterns (always allowed):', ...exemptions.map(p => ` - ${p}`)); + } + if (negatives.length) { + lines.push('', 'Explicit deny patterns:', ...negatives.map(p => ` - ${p}`)); + } + lines.push( + '', + `If this change is needed for the current task, STOP and ask the user for`, + `explicit approval. The user can approve by adding the path (or a covering`, + `glob) to agent-scope/tasks/${task.id}.json under 'allowed' or 'exemptions',`, + `or by switching tasks.` + ); + return lines.join('\n'); +} + +// --------------------------------------------------------------------------- + +export function checkPathFromAnywhere(p, opts = {}) { + const root = opts.root || resolveRepoRoot(); + const { id } = opts.taskId ? { id: opts.taskId } : resolveActiveTaskId(root); + const task = id ? loadTask(root, id) : null; + const rel = normalizeToRepoPath(root, p); + return { root, taskId: id, task, relPath: rel, decision: checkPath(task, rel, root) }; +} + +export const __scopeLibFile = fileURLToPath(import.meta.url); diff --git a/agent-scope/lib/scope.test.mjs b/agent-scope/lib/scope.test.mjs new file mode 100644 index 000000000..1c9d94e10 --- /dev/null +++ b/agent-scope/lib/scope.test.mjs @@ -0,0 +1,427 @@ +// Unit tests for the scope-check library. Run with: +// node --test agent-scope/lib/scope.test.mjs + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + mkdtempSync, writeFileSync, mkdirSync, rmSync, statSync, existsSync, readFileSync, +} from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + checkPath, + checkProtected, + coversProtected, + validateManifest, + normalizeToRepoPath, + loadTask, + resolveActiveTaskId, + listTasks, + explainDeny, + checkNodeVersion, + PROTECTED_PATTERNS, + isBootstrapActive, +} from './scope.mjs'; +import { logDenial, logDecision, MAX_BYTES } from './log.mjs'; + +function makeRepo() { + const root = mkdtempSync(join(tmpdir(), 'agent-scope-test-')); + mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); + return root; +} + +function writeTask(root, id, body) { + writeFileSync(join(root, 'agent-scope/tasks', `${id}.json`), JSON.stringify(body, null, 2)); +} + +// --- core decision -------------------------------------------------------- + +test('checkPath: no task → allow for non-protected path', () => { + assert.equal(checkPath(null, 'any/file.ts'), 'allow'); +}); + +test('checkPath: basic allow', () => { + const t = { id: 't', allowed: ['src/**/*.ts'] }; + assert.equal(checkPath(t, 'src/foo/bar.ts'), 'allow'); +}); + +test('checkPath: deny when not matched', () => { + const t = { id: 't', allowed: ['src/**/*.ts'] }; + assert.equal(checkPath(t, 'lib/other.ts'), 'deny'); +}); + +test('checkPath: exemption', () => { + const t = { id: 't', allowed: ['src/**/*.ts'], exemptions: ['**/dist/**'] }; + assert.equal(checkPath(t, 'anything/dist/bundle.js'), 'exempt'); +}); + +test('checkPath: explicit deny (!) overrides allowed', () => { + const t = { id: 't', allowed: ['src/**', '!src/**/secrets.*'] }; + assert.equal(checkPath(t, 'src/config/secrets.ts'), 'deny'); + assert.equal(checkPath(t, 'src/config/public.ts'), 'allow'); +}); + +test('checkPath: explicit deny in exemptions overrides exemption', () => { + const t = { id: 't', allowed: ['src/**'], exemptions: ['**/dist/**', '!**/dist/secret.js'] }; + assert.equal(checkPath(t, 'foo/dist/secret.js'), 'deny'); + assert.equal(checkPath(t, 'foo/dist/bundle.js'), 'exempt'); +}); + +test('checkPath: path traversal denied', () => { + const t = { id: 't', allowed: ['**'] }; + assert.equal(checkPath(t, '../etc/passwd'), 'deny'); +}); + +// --- protected paths ------------------------------------------------------ + +test('checkProtected: matches a known protected path', () => { + const isolated = makeRepo(); // no bootstrap token + try { + assert.equal(checkProtected('.cursor/hooks.json', isolated), 'deny'); + assert.equal(checkProtected('.cursor/hooks/scope-guard.mjs', isolated), 'deny'); + assert.equal(checkProtected('agent-scope/lib/scope.mjs', isolated), 'deny'); + assert.equal(checkProtected('agent-scope/tasks/base.json', isolated), 'deny'); + assert.equal(checkProtected('agent-scope/active', isolated), 'deny'); + assert.equal(checkProtected('agent-scope/.bootstrap-token', isolated), 'deny'); + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + +test('checkProtected: normal paths pass through', () => { + const isolated = makeRepo(); + try { + assert.equal(checkProtected('packages/core/src/index.ts', isolated), 'allow'); + assert.equal(checkProtected('README.md', isolated), 'allow'); + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + +test('checkProtected: bootstrap env bypass', () => { + process.env.AGENT_SCOPE_BOOTSTRAP = '1'; + try { + assert.equal(checkProtected('.cursor/hooks.json'), 'allow'); + } finally { delete process.env.AGENT_SCOPE_BOOTSTRAP; } +}); + +test('checkProtected: token file bypass', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'agent-scope/.bootstrap-token'), ''); + assert.equal(isBootstrapActive(root), true); + assert.equal(checkProtected('agent-scope/lib/scope.mjs', root), 'allow'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('checkPath: protected even with active task that would allow it', () => { + const t = { id: 't', allowed: ['**'] }; + const isolated = makeRepo(); + try { + assert.equal(checkPath(t, '.cursor/hooks.json', isolated), 'protected'); + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + +test('coversProtected: directory that IS a protected tree root', () => { + const isolated = makeRepo(); + try { + assert.equal(coversProtected('.cursor/hooks', isolated), true); + assert.equal(coversProtected('.cursor/hooks/', isolated), true); + assert.equal(coversProtected('agent-scope/lib', isolated), true); + assert.equal(coversProtected('agent-scope/tasks', isolated), true); + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + +test('coversProtected: ancestor directory of a protected tree', () => { + const isolated = makeRepo(); + try { + assert.equal(coversProtected('.cursor', isolated), true); // contains hooks/, rules/, hooks.json + assert.equal(coversProtected('agent-scope', isolated), true); // contains lib, bin, ... + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + +test('coversProtected: unrelated directory', () => { + const isolated = makeRepo(); + try { + assert.equal(coversProtected('packages/agent', isolated), false); + assert.equal(coversProtected('README.md', isolated), false); + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + +test('coversProtected: bootstrap bypasses', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'agent-scope/.bootstrap-token'), ''); + assert.equal(coversProtected('.cursor', root), false); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('PROTECTED_PATTERNS: covers all system surfaces', () => { + // Sanity: make sure nothing is forgotten. + const required = [ + '.cursor/hooks/**', + '.cursor/hooks.json', + 'agent-scope/lib/**', + 'agent-scope/bin/**', + 'agent-scope/tasks/**', + 'agent-scope/active', + 'agent-scope/.bootstrap-token', + '.git/hooks/**', + ]; + for (const p of required) assert.ok(PROTECTED_PATTERNS.includes(p), `missing protection: ${p}`); +}); + +// --- glob ----------------------------------------------------------------- + +test('glob: ** crosses directory separators', () => { + const t = { id: 't', allowed: ['pkg/**/test.ts'] }; + assert.equal(checkPath(t, 'pkg/a/b/c/test.ts'), 'allow'); + assert.equal(checkPath(t, 'pkg/test.ts'), 'allow'); +}); + +test('glob: * does not cross /', () => { + const t = { id: 't', allowed: ['pkg/*/test.ts'] }; + assert.equal(checkPath(t, 'pkg/a/test.ts'), 'allow'); + assert.equal(checkPath(t, 'pkg/a/b/test.ts'), 'deny'); +}); + +test('glob: ? matches one char', () => { + const t = { id: 't', allowed: ['file?.ts'] }; + assert.equal(checkPath(t, 'file1.ts'), 'allow'); + assert.equal(checkPath(t, 'file12.ts'), 'deny'); + assert.equal(checkPath(t, 'file.ts'), 'deny'); +}); + +test('glob: literal dots', () => { + const t = { id: 't', allowed: ['foo.bar.ts'] }; + assert.equal(checkPath(t, 'foo.bar.ts'), 'allow'); + assert.equal(checkPath(t, 'fooxbarxts'), 'deny'); +}); + +// --- path normalization -------------------------------------------------- + +test('normalizeToRepoPath: absolute → relative', () => { + assert.equal(normalizeToRepoPath('/tmp/repo', '/tmp/repo/a/b.ts'), 'a/b.ts'); +}); + +test('normalizeToRepoPath: relative stays relative', () => { + assert.equal(normalizeToRepoPath('/tmp/repo', 'a/b.ts'), 'a/b.ts'); +}); + +// --- manifest validation -------------------------------------------------- + +test('validateManifest: rejects missing id', () => { + const errs = validateManifest({ allowed: ['**'] }); + assert.ok(errs.some(e => /id/.test(e))); +}); + +test('validateManifest: requires allowed OR inherits OR exemptions', () => { + const errs = validateManifest({ id: 'x' }); + assert.ok(errs.some(e => /allowed \/ inherits \/ exemptions/.test(e))); +}); + +test('validateManifest: inherits alone is ok', () => { + const errs = validateManifest({ id: 'x', inherits: ['base'] }); + assert.deepEqual(errs, []); +}); + +test('validateManifest: rejects bad id chars', () => { + const errs = validateManifest({ id: 'Bad Id!', allowed: ['**'] }); + assert.ok(errs.some(e => /id/.test(e))); +}); + +test('validateManifest: filename mismatch', () => { + const errs = validateManifest({ id: 'foo', allowed: ['**'] }, 'bar'); + assert.ok(errs.some(e => /filename/.test(e))); +}); + +test('validateManifest: rejects unknown fields', () => { + const errs = validateManifest({ id: 'x', allowed: ['**'], secret: 1 }); + assert.ok(errs.some(e => /unknown property/.test(e))); +}); + +test('validateManifest: rejects bad inherits', () => { + const errs = validateManifest({ id: 'x', allowed: ['**'], inherits: ['Bad Id!'] }); + assert.ok(errs.some(e => /inherits/.test(e))); +}); + +test('validateManifest: accepts full valid doc', () => { + const errs = validateManifest({ + id: 'sync', + description: 'refactor sync', + owner: 'bojan', + inherits: ['base'], + allowed: ['src/**/*.ts'], + exemptions: ['**/dist/**'], + notes: 'watch out for ...', + dkg: { taskUri: 'urn:task:1' }, + }); + assert.deepEqual(errs, []); +}); + +// --- manifest loading + inheritance -------------------------------------- + +test('loadTask: returns parsed manifest', () => { + const root = makeRepo(); + try { + writeTask(root, 'x', { id: 'x', allowed: ['**/*.ts'] }); + const t = loadTask(root, 'x'); + assert.equal(t.id, 'x'); + assert.deepEqual(t.allowed, ['**/*.ts']); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('loadTask: throws on corrupt JSON', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'agent-scope/tasks/x.json'), 'not json'); + assert.throws(() => loadTask(root, 'x'), /JSON/); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('loadTask: throws on schema violation', () => { + const root = makeRepo(); + try { + writeTask(root, 'x', { id: 'x' }); + assert.throws(() => loadTask(root, 'x'), /allowed/); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('loadTask: merges allowed + exemptions from inherits', () => { + const root = makeRepo(); + try { + writeTask(root, 'base', { id: 'base', allowed: [], exemptions: ['**/dist/**'] }); + writeTask(root, 'child', { + id: 'child', inherits: ['base'], allowed: ['src/**'], exemptions: ['pnpm-lock.yaml'] + }); + const t = loadTask(root, 'child'); + assert.deepEqual(t.allowed, ['src/**']); + assert.deepEqual(t.exemptions.sort(), ['**/dist/**', 'pnpm-lock.yaml'].sort()); + assert.deepEqual(t.__inheritedFrom, ['base']); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('loadTask: inheritance cycle detected', () => { + const root = makeRepo(); + try { + writeTask(root, 'a', { id: 'a', inherits: ['b'], allowed: ['x'] }); + writeTask(root, 'b', { id: 'b', inherits: ['a'], allowed: ['y'] }); + assert.throws(() => loadTask(root, 'a'), /cycle/); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('loadTask: child deny overrides parent allow', () => { + const root = makeRepo(); + try { + writeTask(root, 'parent', { id: 'parent', allowed: ['src/**'] }); + writeTask(root, 'child', { id: 'child', inherits: ['parent'], allowed: ['!src/secrets.ts'] }); + const t = loadTask(root, 'child'); + assert.equal(checkPath(t, 'src/foo.ts'), 'allow'); + assert.equal(checkPath(t, 'src/secrets.ts'), 'deny'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +// --- active task resolution ----------------------------------------------- + +test('resolveActiveTaskId: env beats file', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'agent-scope/active'), 'from-file\n'); + process.env.AGENT_SCOPE_TASK = 'from-env'; + const r = resolveActiveTaskId(root, { noBranch: true, noGitConfig: true }); + assert.equal(r.id, 'from-env'); + assert.equal(r.source, 'env'); + } finally { + delete process.env.AGENT_SCOPE_TASK; + rmSync(root, { recursive: true, force: true }); + } +}); + +test('resolveActiveTaskId: file when env missing', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'agent-scope/active'), 'from-file\n'); + delete process.env.AGENT_SCOPE_TASK; + const r = resolveActiveTaskId(root, { noBranch: true, noGitConfig: true }); + assert.equal(r.id, 'from-file'); + assert.equal(r.source, 'file'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('resolveActiveTaskId: none when nothing set', () => { + const root = makeRepo(); + try { + delete process.env.AGENT_SCOPE_TASK; + const r = resolveActiveTaskId(root, { noBranch: true, noGitConfig: true }); + assert.equal(r.id, null); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('listTasks: returns sorted ids', () => { + const root = makeRepo(); + try { + writeTask(root, 'beta', { id: 'beta', allowed: ['**'] }); + writeTask(root, 'alpha', { id: 'alpha', allowed: ['**'] }); + assert.deepEqual(listTasks(root), ['alpha', 'beta']); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +// --- messages ------------------------------------------------------------- + +test('explainDeny: contains task id, path, and allowed patterns', () => { + const t = { id: 'sync', description: 'sync work', allowed: ['pkg/**/sync*'] }; + const msg = explainDeny(t, 'pkg/other/x.ts', 'deny'); + assert.match(msg, /sync/); + assert.match(msg, /pkg\/other\/x\.ts/); + assert.match(msg, /pkg\/\*\*\/sync\*/); +}); + +test('explainDeny: protected path message mentions bootstrap', () => { + const msg = explainDeny(null, '.cursor/hooks.json', 'protected'); + assert.match(msg, /PROTECTED PATH/); + assert.match(msg, /bootstrap/i); +}); + +// --- node version --------------------------------------------------------- + +test('checkNodeVersion: passes for current Node', () => { + checkNodeVersion(16); +}); + +test('checkNodeVersion: throws for impossibly high version', () => { + assert.throws(() => checkNodeVersion(999)); +}); + +// --- logging rotation ----------------------------------------------------- + +test('log: rotates jsonl when file exceeds MAX_BYTES', () => { + const root = makeRepo(); + try { + const logsDir = join(root, 'agent-scope/logs'); + mkdirSync(logsDir, { recursive: true }); + const file = join(logsDir, 'denials.jsonl'); + // Pre-fill the log with ~MAX_BYTES of content so the next write triggers rotate. + writeFileSync(file, 'x'.repeat(MAX_BYTES + 1024)); + logDenial(root, { event: 'test', path: 'a/b.ts', task: 'x' }); + // After rotation, denials.jsonl should exist and be small again. + const after = statSync(file); + assert.ok(after.size < 1024, `expected rotated file to be small, got ${after.size}`); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('log: writes jsonl with timestamp + fields', () => { + const root = makeRepo(); + try { + logDenial(root, { event: 'test', path: 'a/b.ts' }); + const content = readFileSync(join(root, 'agent-scope/logs/denials.jsonl'), 'utf8'); + const rec = JSON.parse(content.trim()); + assert.ok(rec.ts); + assert.equal(rec.event, 'test'); + assert.equal(rec.path, 'a/b.ts'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('log: logDecision separate file', () => { + const root = makeRepo(); + try { + logDecision(root, { event: 'ok', path: 'a.ts' }); + assert.ok(existsSync(join(root, 'agent-scope/logs/decisions.jsonl'))); + assert.ok(!existsSync(join(root, 'agent-scope/logs/denials.jsonl'))); + } finally { rmSync(root, { recursive: true, force: true }); } +}); diff --git a/agent-scope/lib/shell-parse.mjs b/agent-scope/lib/shell-parse.mjs new file mode 100644 index 000000000..9667dc4ed --- /dev/null +++ b/agent-scope/lib/shell-parse.mjs @@ -0,0 +1,185 @@ +// Pure shell-command parser used by the beforeShellExecution hook. +// Extracted for unit-testability. No IO, no dependencies on scope.mjs. + +// Split on &&, ||, ;, | — treat each sub-command independently. +export function splitCommands(cmd) { + const parts = []; + let buf = ''; + let inSingle = false, inDouble = false; + for (let i = 0; i < cmd.length; i++) { + const c = cmd[i]; + if (c === "'" && !inDouble) inSingle = !inSingle; + else if (c === '"' && !inSingle) inDouble = !inDouble; + if (!inSingle && !inDouble) { + if ((c === '&' && cmd[i + 1] === '&') || (c === '|' && cmd[i + 1] === '|')) { + parts.push(buf); buf = ''; i++; continue; + } + if (c === ';' || c === '|' || c === '\n') { + parts.push(buf); buf = ''; continue; + } + } + buf += c; + } + if (buf.trim()) parts.push(buf); + return parts.map(s => s.trim()).filter(Boolean); +} + +// Tokenize a single sub-command into argv, stripping quotes. +export function tokenize(cmd) { + const out = []; + let buf = ''; + let inSingle = false, inDouble = false; + for (let i = 0; i < cmd.length; i++) { + const c = cmd[i]; + if (c === '\\' && !inSingle) { buf += cmd[++i] || ''; continue; } + if (c === "'" && !inDouble) { inSingle = !inSingle; continue; } + if (c === '"' && !inSingle) { inDouble = !inDouble; continue; } + if (!inSingle && !inDouble && /\s/.test(c)) { + if (buf) { out.push(buf); buf = ''; } + continue; + } + buf += c; + } + if (buf) out.push(buf); + return out; +} + +export function extractRedirections(tokens) { + const targets = []; + for (let i = 0; i < tokens.length; i++) { + const t = tokens[i]; + if (t === '>' || t === '>>' || t === '&>' || t === '>|') { + if (tokens[i + 1]) targets.push(tokens[i + 1]); + } else if (/^[0-9]*>>?$/.test(t)) { + if (tokens[i + 1]) targets.push(tokens[i + 1]); + } else if (/^([0-9]*>>?|&>)[^\s]+/.test(t)) { + targets.push(t.replace(/^([0-9]*>>?|&>)/, '')); + } else if (t === 'tee' || t === '/usr/bin/tee') { + for (let j = i + 1; j < tokens.length; j++) { + const a = tokens[j]; + if (a === '-a' || a === '--append' || a === '-i' || a === '--ignore-interrupts') continue; + if (a.startsWith('-')) continue; + targets.push(a); + break; + } + } + } + return targets; +} + +export function extractDestructiveTargets(tokens) { + if (!tokens.length) return { cmd: null, targets: [] }; + const head = tokens[0].split('/').pop(); + const DESTRUCTIVE = new Set(['rm', 'mv', 'cp', 'chmod', 'chown', 'truncate', 'install', 'ln', 'sed', 'unlink', 'rmdir']); + if (!DESTRUCTIVE.has(head)) return { cmd: null, targets: [] }; + + const targets = []; + const rest = tokens.slice(1); + if (head === 'sed') { + const inPlace = rest.some(t => t === '-i' || t.startsWith('-i') || t === '--in-place'); + if (!inPlace) return { cmd: head, targets: [] }; + } + + for (const t of rest) { + if (t.startsWith('-')) continue; + if (t.includes('=')) continue; + if (/^[0-9]+$/.test(t)) continue; + if (head === 'chmod' && /^[0-7]{3,4}$/.test(t)) continue; + if (head === 'chown' && !t.includes('/') && !t.startsWith('.')) { + if (targets.length === 0) continue; + } + targets.push(t); + } + return { cmd: head, targets }; +} + +export function extractFindTargets(tokens) { + if (!tokens.length || tokens[0].split('/').pop() !== 'find') return null; + const isDestructive = tokens.some((t, i) => + t === '-delete' || + (t === '-exec' && /^(rm|unlink|truncate|mv|sed|chmod|chown)$/.test((tokens[i + 1] || '').split('/').pop())) + ); + if (!isDestructive) return null; + const paths = []; + for (let i = 1; i < tokens.length; i++) { + const t = tokens[i]; + if (t.startsWith('-')) break; + paths.push(t); + } + return { cmd: 'find', targets: paths.length ? paths : ['.'] }; +} + +export function extractXargsTarget(tokens) { + if (!tokens.length || tokens[0].split('/').pop() !== 'xargs') return null; + for (let i = 1; i < tokens.length; i++) { + const t = tokens[i]; + if (t.startsWith('-')) continue; + const head = t.split('/').pop(); + if (/^(rm|unlink|truncate|mv|sed|chmod|chown|cp|install|ln)$/.test(head)) { + return { cmd: `xargs ${head}`, targets: [] }; + } + return null; + } + return null; +} + +const NESTED_SHELLS = new Set(['bash', 'sh', 'zsh', 'dash', 'ksh']); + +const OPAQUE_EVALUATORS = { + node: ['-e', '--eval', '-p', '--print'], + deno: ['eval'], + python: ['-c'], + python2: ['-c'], + python3: ['-c'], + perl: ['-e', '-E'], + ruby: ['-e'], + php: ['-r'], + lua: ['-e'], +}; + +export function extractNestedShellBody(tokens) { + const head = tokens[0] && tokens[0].split('/').pop(); + if (!head || !NESTED_SHELLS.has(head)) return null; + const dashC = tokens.indexOf('-c'); + if (dashC >= 1 && tokens[dashC + 1]) return { shell: head, body: tokens[dashC + 1] }; + return null; +} + +export function extractOpaqueBody(tokens) { + const head = tokens[0] && tokens[0].split('/').pop(); + if (!head) return null; + const flags = OPAQUE_EVALUATORS[head]; + if (!flags) return null; + for (let i = 1; i < tokens.length; i++) { + if (flags.includes(tokens[i]) && tokens[i + 1] != null) { + return { evaluator: head, flag: tokens[i], body: tokens[i + 1] }; + } + } + return null; +} + +const WRITE_HINTS = [ + /\bwriteFileSync\b/, /\bappendFileSync\b/, /\bunlinkSync\b/, /\brmSync\b/, + /\brmdirSync\b/, /\brenameSync\b/, /\bcpSync\b/, /\bcopyFileSync\b/, + /\bchmodSync\b/, /\bchownSync\b/, /\bsymlinkSync\b/, /\btruncateSync\b/, + /\bcreateWriteStream\b/, /\bmkdirSync\b/, + /\bos\.remove\b/, /\bos\.unlink\b/, /\bos\.rename\b/, /\bshutil\.\w+/, + /\bopen\s*\([^)]*,[^)]*['"](w|a|x)/, + /\bunlink\b/, /\brename\b/, /\brmdir\b/, + /\bFile::(open|write|unlink|rename)/, + /\bFile\.write\b/, /\bFile\.delete\b/, + />\s*[A-Za-z._/-]/, +]; + +export function bodyHasWriteIntent(body) { + return WRITE_HINTS.some(re => re.test(body)); +} + +export function literalsFromProtected(patterns) { + return patterns.map(p => p.replace(/\*\*?$/, '').replace(/\/\*\*$/, '/')); +} + +export function bodyTouchesProtected(body, protectedPatterns) { + const literals = literalsFromProtected(protectedPatterns); + return literals.some(lit => lit && body.includes(lit)); +} diff --git a/agent-scope/lib/shell-parse.test.mjs b/agent-scope/lib/shell-parse.test.mjs new file mode 100644 index 000000000..c5e5e43e1 --- /dev/null +++ b/agent-scope/lib/shell-parse.test.mjs @@ -0,0 +1,248 @@ +// Unit tests for the shell-command parser. Run with: +// node --test agent-scope/lib/shell-parse.test.mjs + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + splitCommands, tokenize, extractRedirections, extractDestructiveTargets, + extractFindTargets, extractXargsTarget, extractNestedShellBody, + extractOpaqueBody, bodyHasWriteIntent, bodyTouchesProtected, +} from './shell-parse.mjs'; +import { PROTECTED_PATTERNS } from './scope.mjs'; + +// --- splitCommands -------------------------------------------------------- + +test('splitCommands: semicolon', () => { + assert.deepEqual(splitCommands('a; b; c'), ['a', 'b', 'c']); +}); + +test('splitCommands: && / ||', () => { + assert.deepEqual(splitCommands('a && b || c'), ['a', 'b', 'c']); +}); + +test('splitCommands: pipe splits', () => { + assert.deepEqual(splitCommands('find . | xargs rm'), ['find .', 'xargs rm']); +}); + +test('splitCommands: respects quotes', () => { + assert.deepEqual(splitCommands('echo "a; b"; echo c'), ['echo "a; b"', 'echo c']); +}); + +// --- tokenize ------------------------------------------------------------- + +test('tokenize: basic', () => { + assert.deepEqual(tokenize('rm -rf foo bar'), ['rm', '-rf', 'foo', 'bar']); +}); + +test('tokenize: quoted arg preserved whole', () => { + assert.deepEqual(tokenize('bash -c "rm x"'), ['bash', '-c', 'rm x']); +}); + +test('tokenize: escaped spaces', () => { + assert.deepEqual(tokenize('rm a\\ b'), ['rm', 'a b']); +}); + +// --- redirections --------------------------------------------------------- + +test('extractRedirections: > target', () => { + assert.deepEqual(extractRedirections(tokenize('echo x > foo.txt')), ['foo.txt']); +}); + +test('extractRedirections: >> append', () => { + assert.deepEqual(extractRedirections(tokenize('echo x >> log.txt')), ['log.txt']); +}); + +test('extractRedirections: tee', () => { + assert.deepEqual(extractRedirections(tokenize('echo x | tee -a out.log')), ['out.log']); +}); + +test('extractRedirections: no redirect', () => { + assert.deepEqual(extractRedirections(tokenize('ls -la')), []); +}); + +// --- destructive targets -------------------------------------------------- + +test('extractDestructiveTargets: rm -rf', () => { + const r = extractDestructiveTargets(tokenize('rm -rf foo bar')); + assert.equal(r.cmd, 'rm'); + assert.deepEqual(r.targets, ['foo', 'bar']); +}); + +test('extractDestructiveTargets: unlink', () => { + const r = extractDestructiveTargets(tokenize('unlink foo')); + assert.equal(r.cmd, 'unlink'); + assert.deepEqual(r.targets, ['foo']); +}); + +test('extractDestructiveTargets: chmod numeric mode skipped', () => { + const r = extractDestructiveTargets(tokenize('chmod 755 script.sh')); + assert.equal(r.cmd, 'chmod'); + assert.deepEqual(r.targets, ['script.sh']); +}); + +test('extractDestructiveTargets: sed WITHOUT -i is not destructive', () => { + const r = extractDestructiveTargets(tokenize('sed s/a/b/ file.txt')); + assert.equal(r.cmd, 'sed'); + assert.deepEqual(r.targets, []); +}); + +test('extractDestructiveTargets: sed -i is destructive', () => { + const r = extractDestructiveTargets(tokenize('sed -i s/a/b/ file.txt')); + assert.equal(r.cmd, 'sed'); + assert.deepEqual(r.targets, ['s/a/b/', 'file.txt']); +}); + +test('extractDestructiveTargets: non-destructive command', () => { + const r = extractDestructiveTargets(tokenize('echo hello')); + assert.equal(r.cmd, null); +}); + +// --- find / xargs --------------------------------------------------------- + +test('extractFindTargets: -delete', () => { + const r = extractFindTargets(tokenize('find .cursor -name "*.mjs" -delete')); + assert.equal(r.cmd, 'find'); + assert.deepEqual(r.targets, ['.cursor']); +}); + +test('extractFindTargets: -exec rm', () => { + const r = extractFindTargets(tokenize('find agent-scope -name "*.json" -exec rm {} ;')); + assert.equal(r.cmd, 'find'); + assert.deepEqual(r.targets, ['agent-scope']); +}); + +test('extractFindTargets: no destructive expression → null', () => { + assert.equal(extractFindTargets(tokenize('find . -name "*.ts"')), null); +}); + +test('extractXargsTarget: xargs rm', () => { + const r = extractXargsTarget(tokenize('xargs rm')); + assert.equal(r.cmd, 'xargs rm'); +}); + +test('extractXargsTarget: xargs -0 unlink', () => { + const r = extractXargsTarget(tokenize('xargs -0 unlink')); + assert.equal(r.cmd, 'xargs unlink'); +}); + +test('extractXargsTarget: xargs echo (not destructive)', () => { + assert.equal(extractXargsTarget(tokenize('xargs echo')), null); +}); + +// --- nested shell / opaque evaluators ------------------------------------- + +test('extractNestedShellBody: bash -c', () => { + const r = extractNestedShellBody(tokenize('bash -c "rm -rf foo"')); + assert.equal(r.shell, 'bash'); + assert.equal(r.body, 'rm -rf foo'); +}); + +test('extractNestedShellBody: sh -c with absolute path', () => { + const r = extractNestedShellBody(tokenize('/bin/sh -c "echo x > y"')); + assert.equal(r.shell, 'sh'); + assert.equal(r.body, 'echo x > y'); +}); + +test('extractNestedShellBody: not a shell → null', () => { + assert.equal(extractNestedShellBody(tokenize('echo hi')), null); +}); + +test('extractOpaqueBody: node -e', () => { + const r = extractOpaqueBody(tokenize("node -e \"require('fs').unlinkSync('x')\"")); + assert.equal(r.evaluator, 'node'); + assert.equal(r.flag, '-e'); + assert.match(r.body, /unlinkSync/); +}); + +test('extractOpaqueBody: python3 -c', () => { + const r = extractOpaqueBody(tokenize('python3 -c "import os; os.remove(\'x\')"')); + assert.equal(r.evaluator, 'python3'); + assert.match(r.body, /os\.remove/); +}); + +test('extractOpaqueBody: perl -e', () => { + const r = extractOpaqueBody(tokenize("perl -e \"unlink 'x'\"")); + assert.equal(r.evaluator, 'perl'); +}); + +test('extractOpaqueBody: plain node (no -e)', () => { + assert.equal(extractOpaqueBody(tokenize('node script.js')), null); +}); + +// --- body intent / protected-path scanning -------------------------------- + +test('bodyHasWriteIntent: fs.writeFileSync', () => { + assert.ok(bodyHasWriteIntent("require('fs').writeFileSync('x', 'y')")); +}); + +test('bodyHasWriteIntent: python os.remove', () => { + assert.ok(bodyHasWriteIntent('os.remove("x")')); +}); + +test("bodyHasWriteIntent: python open('w')", () => { + assert.ok(bodyHasWriteIntent('open("foo.txt", "w").write("x")')); +}); + +test('bodyHasWriteIntent: shell-style redirect in body', () => { + assert.ok(bodyHasWriteIntent('echo x > y.txt')); +}); + +test('bodyHasWriteIntent: read-only code', () => { + assert.equal(bodyHasWriteIntent("console.log('hi')"), false); +}); + +test('bodyTouchesProtected: .cursor/hooks.json', () => { + assert.ok(bodyTouchesProtected("fs.writeFileSync('.cursor/hooks.json', '')", PROTECTED_PATTERNS)); +}); + +test('bodyTouchesProtected: agent-scope/lib/scope.mjs', () => { + assert.ok(bodyTouchesProtected("open('agent-scope/lib/scope.mjs', 'w')", PROTECTED_PATTERNS)); +}); + +test('bodyTouchesProtected: agent-scope/active', () => { + assert.ok(bodyTouchesProtected("fs.writeFileSync('agent-scope/active', 'evil')", PROTECTED_PATTERNS)); +}); + +test('bodyTouchesProtected: normal path does not match', () => { + assert.equal(bodyTouchesProtected("fs.writeFileSync('README.md', '')", PROTECTED_PATTERNS), false); +}); + +// --- composite scenarios (the gap we're closing) -------------------------- + +test('scenario: node -e + fs.writeFileSync + protected path is flagged', () => { + const cmd = "node -e \"require('fs').writeFileSync('agent-scope/active', 'evil')\""; + const tokens = tokenize(cmd); + const opaque = extractOpaqueBody(tokens); + assert.ok(opaque); + assert.ok(bodyHasWriteIntent(opaque.body)); + assert.ok(bodyTouchesProtected(opaque.body, PROTECTED_PATTERNS)); +}); + +test('scenario: python3 -c + open(w) + .cursor/hooks/ is flagged', () => { + const cmd = 'python3 -c "open(\'.cursor/hooks/evil.py\', \'w\').write(\'x\')"'; + const tokens = tokenize(cmd); + const opaque = extractOpaqueBody(tokens); + assert.ok(opaque); + assert.ok(bodyHasWriteIntent(opaque.body)); + assert.ok(bodyTouchesProtected(opaque.body, PROTECTED_PATTERNS)); +}); + +test('scenario: bash -c "rm -rf .cursor/hooks" produces destructive target on recursion', () => { + const cmd = 'bash -c "rm -rf .cursor/hooks"'; + const outer = tokenize(cmd); + const nested = extractNestedShellBody(outer); + assert.ok(nested); + const inner = tokenize(nested.body); + const dest = extractDestructiveTargets(inner); + assert.equal(dest.cmd, 'rm'); + assert.deepEqual(dest.targets, ['.cursor/hooks']); +}); + +test('scenario: benign node command (read-only) is not flagged', () => { + const cmd = "node -e \"console.log(require('fs').readFileSync('.cursor/hooks.json', 'utf8'))\""; + const tokens = tokenize(cmd); + const opaque = extractOpaqueBody(tokens); + assert.ok(opaque); + // Body references protected path but has no write intent → not flagged. + assert.equal(bodyHasWriteIntent(opaque.body), false); +}); diff --git a/agent-scope/schema/task.schema.json b/agent-scope/schema/task.schema.json new file mode 100644 index 000000000..75d83c757 --- /dev/null +++ b/agent-scope/schema/task.schema.json @@ -0,0 +1,48 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://dkg.io/agent-scope/task.schema.json", + "title": "agent-scope Task Manifest", + "description": "Declares what files a task may modify. See agent-scope/README.md.", + "type": "object", + "required": ["id"], + "additionalProperties": false, + "anyOf": [ + { "required": ["allowed"] }, + { "required": ["inherits"] }, + { "required": ["exemptions"] } + ], + "properties": { + "id": { + "type": "string", + "pattern": "^[a-z0-9][a-z0-9-_.]{0,63}$", + "description": "Task identifier. Must match the filename (without .json)." + }, + "description": { "type": "string" }, + "owner": { "type": "string" }, + "created": { "type": "string", "format": "date-time" }, + "inherits": { + "type": "array", + "items": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-_.]{0,63}$" }, + "description": "Task ids to inherit `allowed` and `exemptions` from (merged, parents first)." + }, + "allowed": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Glob patterns this task may write to. Supports *, **, ?. Prefix with ! to negate." + }, + "exemptions": { + "type": "array", + "items": { "type": "string", "minLength": 1 }, + "description": "Patterns always allowed (build artifacts, lockfiles, generated files)." + }, + "notes": { "type": "string" }, + "dkg": { + "type": "object", + "additionalProperties": false, + "properties": { + "taskUri": { "type": "string" }, + "sessionUri": { "type": "string" } + } + } + } +} diff --git a/agent-scope/tasks/base.json b/agent-scope/tasks/base.json new file mode 100644 index 000000000..21eb5972e --- /dev/null +++ b/agent-scope/tasks/base.json @@ -0,0 +1,20 @@ +{ + "id": "base", + "description": "Shared exemptions for most tasks — extend via `inherits: [\"base\"]`.", + "owner": "agent-scope", + "allowed": [], + "exemptions": [ + "**/dist/**", + "**/build/**", + "**/*.tsbuildinfo", + "**/node_modules/**", + "pnpm-lock.yaml", + "package-lock.json", + "yarn.lock", + "**/coverage/**", + "**/.turbo/**", + "**/.next/**", + "**/.vite/**" + ], + "notes": "This base task has no `allowed` patterns of its own. Child tasks must provide their own `allowed`. The base only contributes build-artifact / lockfile exemptions that apply to most tasks." +} diff --git a/agent-scope/tasks/staking.json b/agent-scope/tasks/staking.json new file mode 100644 index 000000000..8d22bb07a --- /dev/null +++ b/agent-scope/tasks/staking.json @@ -0,0 +1,20 @@ +{ + "id": "staking", + "description": "Example: work on the staking contracts and chain adapter bindings", + "owner": "unassigned", + "inherits": ["base"], + "allowed": [ + "packages/evm-module/contracts/Staking.sol", + "packages/evm-module/contracts/StakingKPI.sol", + "packages/evm-module/contracts/storage/StakingStorage.sol", + "packages/evm-module/contracts/storage/DelegatorsInfo.sol", + "packages/evm-module/contracts/storage/ConvictionStakingStorage.sol", + "packages/evm-module/test/**staking*", + "packages/evm-module/deploy/**staking*", + "packages/chain/src/**staking*" + ], + "exemptions": [ + "**/artifacts/**", + "**/cache/**" + ] +} diff --git a/agent-scope/tasks/sync-refactor.json b/agent-scope/tasks/sync-refactor.json new file mode 100644 index 000000000..a3840fcc6 --- /dev/null +++ b/agent-scope/tasks/sync-refactor.json @@ -0,0 +1,12 @@ +{ + "id": "sync-refactor", + "description": "Example: refactor the peer sync protocol (/dkg/sync/1.0.0)", + "owner": "unassigned", + "inherits": ["base"], + "allowed": [ + "packages/agent/src/**sync*", + "packages/core/src/**sync*", + "packages/publisher/src/**sync*", + "packages/*/test/**sync*" + ] +} diff --git a/package.json b/package.json index 1e08a3333..cbd31c058 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,14 @@ "test:e2e:ui": "pnpm --filter @origintrail-official/dkg-node-ui test:e2e", "test:game:ui": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:ui", "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", - "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui" + "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", + "pretest": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs && node agent-scope/bin/task.mjs validate", + "task": "node agent-scope/bin/task.mjs", + "scope-check": "node agent-scope/bin/scope-check.mjs", + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs", + "scope:install-hooks": "bash agent-scope/bin/install-git-hooks.sh", + "scope:validate": "node agent-scope/bin/task.mjs validate", + "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show" }, "devDependencies": { "@types/node": "^22", From 0a2e45b8c075d23b9d4a6d470fd2f8636850a75a Mon Sep 17 00:00:00 2001 From: Bojan Date: Tue, 21 Apr 2026 16:00:40 +0200 Subject: [PATCH 02/21] remove commit blocker and ci --- .cursor/rules/agent-scope.mdc | 11 ++- CLAUDE.md | 5 ++ agent-scope/README.md | 111 +++++++++++++-------------- agent-scope/bin/install-git-hooks.sh | 53 ------------- agent-scope/bin/scope-check.mjs | 64 --------------- agent-scope/hooks/pre-commit | 52 ------------- agent-scope/lib/scope.mjs | 3 - agent-scope/lib/scope.test.mjs | 7 +- package.json | 3 - 9 files changed, 69 insertions(+), 240 deletions(-) delete mode 100755 agent-scope/bin/install-git-hooks.sh delete mode 100755 agent-scope/bin/scope-check.mjs delete mode 100755 agent-scope/hooks/pre-commit diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index ae24ef558..4375dc0f2 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -200,16 +200,19 @@ Always denied regardless of task, unless a human has enabled bootstrap (`touch agent-scope/.bootstrap-token` or `AGENT_SCOPE_BOOTSTRAP=1`): - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` -- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/hooks/**` -- `agent-scope/schema/**`, `agent-scope/tasks/**`, `agent-scope/active`, - `agent-scope/.bootstrap-token` -- `.git/hooks/**`, `.github/workflows/agent-scope.yml` +- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/schema/**` +- `agent-scope/tasks/**`, `agent-scope/active`, `agent-scope/.bootstrap-token` If one of these needs to change, use the `bootstrap` option from the denial menu — do not try to bypass (no shell redirection, no `node -e`, no alternate tooling). The `afterShell` hook will delete any untracked files in these paths even if the bypass succeeded, so retry attempts are wasted. +Note: the guard operates **only on agent actions**. Humans committing or +pushing manually through their terminal/IDE are not restricted — there are no +git hooks and no CI enforcement. If a human edits a protected file by hand, +they can commit and push normally. + ## Manifest cheat sheet ```json diff --git a/CLAUDE.md b/CLAUDE.md index bd9a8cbec..f0de9eaad 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -295,3 +295,8 @@ Manifest format is in `agent-scope/README.md`. Never edit a protected path (`.cursor/hooks/**`, `agent-scope/lib/**`, etc.) without user-granted bootstrap. Never improvise around a denial. +The guard restricts **agent** actions only. Humans committing, pushing, or +editing through their own terminal are not restricted — there are no git +hooks and no CI enforcement layer. That distinction matters if a user edits +a protected file by hand: they can commit and push normally. + diff --git a/agent-scope/README.md b/agent-scope/README.md index 57ae93fac..64fef7129 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -3,9 +3,14 @@ Task-scoped write permissions for AI coding agents. An agent can **read** the whole repo, but can only **write** files that are -listed in the active task's manifest. Attempts to write out-of-scope files are -blocked by Cursor hooks, git pre-commit, and CI — and must be explicitly -approved by a human (by editing the manifest). +listed in the active task's manifest. Attempts to write out-of-scope files +are blocked by a stack of Cursor hooks and must be explicitly approved by a +human (by editing the manifest). + +The guard restricts **agent** actions only. Humans committing, pushing, or +editing through their own terminal are never restricted — there are no git +hooks and no CI enforcement. If you edit a protected file by hand, you can +commit and push normally. ## Opt-in by default @@ -35,16 +40,16 @@ default. Agent → Cursor sessionStart hook → injects active-task context (silent when idle) Agent → Cursor preToolUse hook → blocks out-of-scope Write/Edit/Delete Agent → Cursor beforeShellExecution → blocks destructive shell cmds on denied paths -Agent → Cursor afterShellExecution → reverts out-of-scope shell writes -System → hardcoded protected paths → always blocks writes to agent-scope itself -Dev → git pre-commit hook → blocks local commits of out-of-scope files -CI → GitHub Actions → blocks PRs with out-of-scope diffs +Agent → Cursor afterShellExecution → reverts out-of-scope shell writes, deletes untracked files in denied paths +System → hardcoded protected paths → always blocks agent writes to agent-scope itself Ops → optional webhook sink → forwards denials to DKG/Slack/etc. ``` -All layers use the same library (`agent-scope/lib/scope.mjs`) and the same -manifests (`agent-scope/tasks/*.json`). No layer is optional — bypassing one -(e.g. Cursor's hook) still leaves the commit, PR, and review layers. +All four agent-facing layers use the same library +(`agent-scope/lib/scope.mjs`) and the same manifests +(`agent-scope/tasks/*.json`). The pre-shell and after-shell layers back each +other up, so destructive commands that slip past the pre-check get reverted +or deleted afterwards. ## Concepts @@ -63,11 +68,11 @@ manifests (`agent-scope/tasks/*.json`). No layer is optional — bypassing one ## One-time setup -```bash -# Install the git pre-commit hook (per developer) -pnpm scope:install-hooks +There is no setup. The Cursor hooks are configured via `.cursor/hooks.json` +and activate automatically in any Cursor session opened on this repo. Sanity +checks: -# Verify everything is in order +```bash pnpm scope:test # runs the scope library unit tests pnpm scope:validate # validates every manifest ``` @@ -173,18 +178,18 @@ Run `pnpm scope:validate` to verify all manifests conform to ## How enforcement works -Six layers: - -1. **Cursor `sessionStart` hook** (`.cursor/hooks/session-start.mjs`) injects - the active task's allowed patterns into the agent's context so it knows - what it may modify from the first turn. **When no task is active and - bootstrap is off, the hook emits nothing** — the agent's initial context - is untouched. Only when a task is active (or bootstrap is on) does it - surface a context block. -2. **Cursor `preToolUse` hook** (`.cursor/hooks/scope-guard.mjs`) runs before - every `Write`, `StrReplace`, `Delete`, `EditNotebook`, `MultiEdit`, and - `Edit`. Runs the protected-path check first, then the task-scope check. -3. **Cursor `beforeShellExecution` hook** (`.cursor/hooks/shell-precheck.mjs`) +Four agent-facing layers, all running inside Cursor: + +1. **`sessionStart` hook** (`.cursor/hooks/session-start.mjs`) injects the + active task's allowed patterns into the agent's context so it knows what + it may modify from the first turn. **When no task is active and bootstrap + is off, the hook emits nothing** — the agent's initial context is + untouched. Only when a task is active (or bootstrap is on) does it surface + a context block. +2. **`preToolUse` hook** (`.cursor/hooks/scope-guard.mjs`) runs before every + `Write`, `StrReplace`, `Delete`, `EditNotebook`, `MultiEdit`, and `Edit`. + It runs the protected-path check first, then the task-scope check. +3. **`beforeShellExecution` hook** (`.cursor/hooks/shell-precheck.mjs`) tokenises the pending shell command and blocks destructive verbs (`rm`, `mv`, `cp`, `chmod`, `chown`, `truncate`, `ln -sf`, `sed -i`, redirections `>` / `>>` / `tee`, `find -delete`, `xargs rm`) when their @@ -193,26 +198,21 @@ Six layers: catch bypass attempts that hide destructive operations inside string arguments. Parsing logic lives in `agent-scope/lib/shell-parse.mjs` and is fully unit-tested. -4. **Cursor `afterShellExecution` hook** (`.cursor/hooks/shell-diff-check.mjs`) - is the backstop for anything the pre-check misses: it runs - `git status --porcelain` and `git checkout --` reverts any tracked - out-of-scope/protected modifications. Untracked files in denied paths - are **deleted** (so an agent cannot establish persistent state like a - new hook file via a pre-shell bypass). -5. **Git pre-commit hook** (`agent-scope/hooks/pre-commit`, installed via - `pnpm scope:install-hooks`) blocks local commits of out-of-scope files. - Also hard-refuses to commit `agent-scope/.bootstrap-token`. -6. **GitHub Actions** (`.github/workflows/agent-scope.yml`) runs on every - PR: validates all manifests, runs the unit tests, blocks the bootstrap - token from being committed, resolves the task id from the PR body or - branch name, and fails the check (with a PR comment) if any changed file - is out of scope. +4. **`afterShellExecution` hook** (`.cursor/hooks/shell-diff-check.mjs`) is + the backstop for anything the pre-check misses: it runs + `git status --porcelain`, `git checkout --` reverts any tracked + out-of-scope/protected modifications, and **deletes** untracked files in + denied paths (so an agent cannot establish persistent state like a new + hook file via a pre-shell bypass). If no active task is set (no env, no file, no matching branch, no git-config) **and** bootstrap is off, layer 1 is silent and layers 2–4 only trigger on -the hardcoded protected paths. Layers 5–6 likewise only gate protected-path -commits/PRs. Everything else is a no-op — you can do ad-hoc work without -changing the workflow. +the hardcoded protected paths. Everything else is a no-op — you can do +ad-hoc work without changing the workflow. + +No layer restricts **humans**. You can `git commit`, `git push`, and edit +anything manually through your terminal or IDE without interacting with the +guard — it only sees what the agent does. ## Hardcoded protected paths @@ -221,10 +221,12 @@ edit them, the whole thing would be worthless. These paths are **always denied** regardless of active task, unless bootstrap mode is active: - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` -- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/hooks/**` -- `agent-scope/schema/**`, `agent-scope/tasks/**`, `agent-scope/active` -- `agent-scope/.bootstrap-token` -- `.git/hooks/**`, `.github/workflows/agent-scope.yml` +- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/schema/**` +- `agent-scope/tasks/**`, `agent-scope/active`, + `agent-scope/.bootstrap-token` + +(This list applies to **agent** writes only. A human editing any of these +files through their own terminal/IDE is not restricted.) ### Bootstrap mode @@ -246,8 +248,9 @@ the agent context. When you're done, remove it: rm agent-scope/.bootstrap-token ``` -The bootstrap token is in `.gitignore` and the pre-commit hook hard-refuses -to commit it, so it cannot leak into git history. +The bootstrap token is in `.gitignore`, so it cannot accidentally leak into +a commit even if you `git add .`. If you ever do `git add -f` it, remove it +before pushing. ## Manifest inheritance @@ -346,16 +349,6 @@ agent-scope/lib/denial.test.mjs # 33 unit tests No special tokens or APIs — the manifest is the source of truth; edit it to grant permission. Changes to a manifest still go through normal review. -## Emergency override - -For genuine emergencies, the git pre-commit hook can be skipped with: - -```bash -AGENT_SCOPE_SKIP=1 git commit ... -``` - -The CI check cannot be bypassed from the dev machine — it runs on GitHub. - ## Debug / audit ```bash diff --git a/agent-scope/bin/install-git-hooks.sh b/agent-scope/bin/install-git-hooks.sh deleted file mode 100755 index 122cf3d37..000000000 --- a/agent-scope/bin/install-git-hooks.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env bash -# Install agent-scope git hooks into .git/hooks/. -# -# Idempotent: if a hook with a different name already exists, we preserve it -# and only chain the agent-scope checks on top. - -set -euo pipefail - -repo_root=$(git rev-parse --show-toplevel) -cd "$repo_root" - -hooks_src="$repo_root/agent-scope/hooks" -hooks_dst="$repo_root/.git/hooks" - -if [[ ! -d "$hooks_dst" ]]; then - echo "error: $hooks_dst not found (is this a git repo?)" >&2 - exit 1 -fi - -install_hook() { - local name="$1" - local src="$hooks_src/$name" - local dst="$hooks_dst/$name" - - if [[ ! -f "$src" ]]; then - echo "skip: $name (no source)" >&2 - return - fi - - if [[ -f "$dst" ]] && ! grep -q 'agent-scope' "$dst" 2>/dev/null; then - # Existing non-agent-scope hook — back it up and chain. - cp "$dst" "$dst.pre-agent-scope.bak" - cat > "$dst" <] [--stdin] [ ...]'); - process.exit(0); - } - paths.push(a); -} - -if (stdinMode) { - const data = readFileSync(0, 'utf8'); - for (const line of data.split(/\r?\n/)) { - const p = line.trim(); - if (p) paths.push(p); - } -} - -if (paths.length === 0) { - console.error('scope-check: no paths provided'); - process.exit(2); -} - -const root = resolveRepoRoot(); -const taskId = taskOverride || getActiveTaskId(root); -const task = taskId ? loadTask(root, taskId) : null; - -let anyBad = false; -for (const p of paths) { - const rel = normalizeToRepoPath(root, p); - const decision = checkPath(task, rel, root); - console.log(`${decision.padEnd(9)} ${rel}`); - if (decision === 'deny' || decision === 'protected') anyBad = true; -} - -if (!task) { - console.error('(no active task — only protected paths enforced)'); -} - -process.exit(anyBad ? 1 : 0); diff --git a/agent-scope/hooks/pre-commit b/agent-scope/hooks/pre-commit deleted file mode 100755 index fb8f4b3bf..000000000 --- a/agent-scope/hooks/pre-commit +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env bash -# agent-scope pre-commit hook. Blocks commits that touch out-of-scope files. -# -# Install with: -# bash agent-scope/bin/install-git-hooks.sh -# Skip once (emergencies only): -# AGENT_SCOPE_SKIP=1 git commit ... - -set -euo pipefail - -if [[ "${AGENT_SCOPE_SKIP:-}" == "1" ]]; then - echo "agent-scope: pre-commit skipped (AGENT_SCOPE_SKIP=1)" >&2 - exit 0 -fi - -# Discover repo root robustly (hook may be called from any cwd). -repo_root=$(git rev-parse --show-toplevel) -cd "$repo_root" - -# Collect staged files (added/copied/modified/renamed). Zero-delimited for safety. -mapfile -d '' -t staged < <(git diff --cached --name-only --diff-filter=ACMR -z 2>/dev/null || true) - -if [[ ${#staged[@]} -eq 0 ]]; then - exit 0 -fi - -# Hardcoded refusal: never let the bootstrap token slip into a commit. -for p in "${staged[@]}"; do - if [[ "$p" == "agent-scope/.bootstrap-token" ]]; then - echo "agent-scope: REFUSING to commit agent-scope/.bootstrap-token." >&2 - echo " This file disables hardcoded path protection. Unstage it:" >&2 - echo " git restore --staged agent-scope/.bootstrap-token" >&2 - exit 1 - fi -done - -# Feed staged paths into scope-check via stdin. -# Exit codes: 0 = all allowed/exempt, 1 = at least one denied, 2 = usage error. -if printf '%s\n' "${staged[@]}" | node agent-scope/bin/scope-check.mjs --stdin; then - exit 0 -else - rc=$? - echo "" >&2 - echo "agent-scope: pre-commit blocked $rc-coded check — out-of-task files are staged." >&2 - echo "Options:" >&2 - echo " 1. Unstage them: git restore --staged " >&2 - echo " 2. Add the paths to the active task:" >&2 - echo " \$EDITOR agent-scope/tasks/\$(node agent-scope/bin/task.mjs show | head -1 | awk '{print \$NF}').json" >&2 - echo " 3. Commit under a different task: node agent-scope/bin/task.mjs set " >&2 - echo " 4. Emergency override (leaves an audit trail): AGENT_SCOPE_SKIP=1 git commit ..." >&2 - exit 1 -fi diff --git a/agent-scope/lib/scope.mjs b/agent-scope/lib/scope.mjs index e4f78ef72..cb9c94970 100644 --- a/agent-scope/lib/scope.mjs +++ b/agent-scope/lib/scope.mjs @@ -39,13 +39,10 @@ export const PROTECTED_PATTERNS = [ '.cursor/rules/agent-scope.mdc', 'agent-scope/lib/**', 'agent-scope/bin/**', - 'agent-scope/hooks/**', 'agent-scope/schema/**', 'agent-scope/tasks/**', 'agent-scope/active', 'agent-scope/.bootstrap-token', - '.git/hooks/**', - '.github/workflows/agent-scope.yml', ]; function bootstrapActive(root) { diff --git a/agent-scope/lib/scope.test.mjs b/agent-scope/lib/scope.test.mjs index 1c9d94e10..9e3660455 100644 --- a/agent-scope/lib/scope.test.mjs +++ b/agent-scope/lib/scope.test.mjs @@ -153,16 +153,19 @@ test('coversProtected: bootstrap bypasses', () => { }); test('PROTECTED_PATTERNS: covers all system surfaces', () => { - // Sanity: make sure nothing is forgotten. + // Sanity: make sure nothing is forgotten. The guard only protects its own + // live surfaces (Cursor hooks + rule + scope library + bin CLI + task + // manifests + active-task pointer + the bootstrap token itself). const required = [ '.cursor/hooks/**', '.cursor/hooks.json', + '.cursor/rules/agent-scope.mdc', 'agent-scope/lib/**', 'agent-scope/bin/**', + 'agent-scope/schema/**', 'agent-scope/tasks/**', 'agent-scope/active', 'agent-scope/.bootstrap-token', - '.git/hooks/**', ]; for (const p of required) assert.ok(PROTECTED_PATTERNS.includes(p), `missing protection: ${p}`); }); diff --git a/package.json b/package.json index cbd31c058..96959e01a 100644 --- a/package.json +++ b/package.json @@ -28,11 +28,8 @@ "test:game:ui": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:ui", "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", - "pretest": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs && node agent-scope/bin/task.mjs validate", "task": "node agent-scope/bin/task.mjs", - "scope-check": "node agent-scope/bin/scope-check.mjs", "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs", - "scope:install-hooks": "bash agent-scope/bin/install-git-hooks.sh", "scope:validate": "node agent-scope/bin/task.mjs validate", "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show" }, From 1db931673e0c730b7216e92c305f7f7d48f0dd9f Mon Sep 17 00:00:00 2001 From: Bojan Date: Tue, 21 Apr 2026 16:08:49 +0200 Subject: [PATCH 03/21] update --- .cursor/rules/agent-scope.mdc | 8 +++- CLAUDE.md | 9 +++- agent-scope/lib/denial.mjs | 77 ++++++++++++++++++++++++++++----- agent-scope/lib/denial.test.mjs | 46 +++++++++++++++++--- 4 files changed, 122 insertions(+), 18 deletions(-) diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 4375dc0f2..9e25b662b 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -158,6 +158,11 @@ The JSON has this shape (TypeScript for clarity): 2. **Extract the JSON** from between `agent-scope-menu:begin` and `:end`. 3. **Call `AskQuestion`** with a single question whose prompt **must include**: - The denied path (or command). + - **Why it's restricted**: for `reason: "protected"` denials, read the + prose `Why this file is guarded` block and summarise it for the user + (use `protectedRole` / `protectedKind` from the structured JSON). For + `reason: "out-of-scope"` denials, state that the active task's manifest + does not list this path. - **Your reasoning** in 1–2 sentences: why you wanted to touch this file, what you were trying to accomplish. This is the plan-mode "here's what I was thinking" that the user needs to make an informed decision. @@ -165,7 +170,8 @@ The JSON has this shape (TypeScript for clarity): and why. Lead with the JSON's `recommendedOptionId` unless you have a concrete reason to override it. - The options array, verbatim from the JSON — use each entry's `id` as the - option id and `label` as the display label. + option id and `label` as the display label. For protected denials the + labels are already phrased as Yes / No / No-but-continue / Custom. 4. **Act on the user's choice** using the matching `action` object: - `add_to_manifest` → edit `agent-scope/tasks/.json`, append the listed patterns to `allowed`, save, then retry the original operation. diff --git a/CLAUDE.md b/CLAUDE.md index f0de9eaad..6face498d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -261,12 +261,19 @@ AskQuestion prompt (see below). 2. **Extract the JSON.** Parse between the fences. 3. **Call `AskQuestion`** with ONE question whose prompt **must include**: - The denied path / command. + - **Why it's restricted** — for `reason: "protected"` denials, summarise + the `Why this file is guarded` prose block (use `protectedRole` / + `protectedKind` from the structured JSON for a concrete label). For + `reason: "out-of-scope"` denials, state that the active task's manifest + does not list this path. - **Your reasoning in 1–2 sentences** — why you wanted to touch this file, what you were trying to accomplish. This is the "here's what I was thinking" that the user needs to make an informed decision. - **Your recommendation** — lead with the JSON's `recommendedOptionId` unless you have a concrete reason to override it. - - The full `options` array, verbatim — use each entry's `id`/`label`. + - The full `options` array, verbatim — use each entry's `id`/`label`. For + protected denials the labels are pre-phrased as Yes / No / No-but-skip + / custom so the prompt reads as a plain yes/no question. 4. **Act on the user's choice** by matching the `action.kind`: - `add_to_manifest` → edit `agent-scope/tasks/.json`, append patterns to `allowed`, retry. diff --git a/agent-scope/lib/denial.mjs b/agent-scope/lib/denial.mjs index c5ec882cc..40d50bfdf 100644 --- a/agent-scope/lib/denial.mjs +++ b/agent-scope/lib/denial.mjs @@ -105,19 +105,58 @@ export function buildOutOfScopeOptions({ deniedPath, activeTaskId, alternatives return opts; } +// Classify a protected path so the denial prose can explain WHY that specific +// file is guarded, not just that it is. Keeps the menu copy concrete. +export function classifyProtected(relPath) { + if (!relPath || typeof relPath !== 'string') return { kind: 'unknown', role: 'protected file' }; + if (relPath.startsWith('.cursor/hooks/') || relPath === '.cursor/hooks.json') { + return { kind: 'cursor-hook', role: 'a Cursor hook that enforces agent-scope in every session' }; + } + if (relPath === '.cursor/rules/agent-scope.mdc') { + return { kind: 'cursor-rule', role: 'the rule that tells the agent to surface denial menus via AskQuestion' }; + } + if (relPath.startsWith('agent-scope/lib/')) { + return { kind: 'scope-library', role: 'the shared enforcement library used by every hook' }; + } + if (relPath.startsWith('agent-scope/bin/')) { + return { kind: 'scope-cli', role: 'the `pnpm task` CLI — if modified, the whole task workflow can be subverted' }; + } + if (relPath.startsWith('agent-scope/schema/')) { + return { kind: 'scope-schema', role: 'the JSON schema that validates every task manifest' }; + } + if (relPath.startsWith('agent-scope/tasks/')) { + return { kind: 'task-manifest', role: 'a task manifest — editing it would silently expand or shrink what agents can write' }; + } + if (relPath === 'agent-scope/active') { + return { kind: 'active-pointer', role: 'the active-task pointer — editing it would let the agent pick its own scope' }; + } + if (relPath === 'agent-scope/.bootstrap-token') { + return { kind: 'bootstrap-token', role: 'the bootstrap token itself — writing it would self-grant full access' }; + } + return { kind: 'unknown', role: 'a file on the hardcoded protected list' }; +} + // Menu for protected-path denials — only the human can unlock. export function buildProtectedOptions({ deniedPath }) { return [ { id: 'bootstrap', - label: 'I need to modify agent-scope itself — please enable bootstrap', + label: `Yes — let the agent edit "${deniedPath}" (enable bootstrap, then re-lock after)`, action: { kind: 'bootstrap', - instruction: 'Run in your own terminal: touch agent-scope/.bootstrap-token (then tell me "go"). When done, rm agent-scope/.bootstrap-token to re-lock.', + instruction: 'In your own terminal run:\n touch agent-scope/.bootstrap-token\nThen reply "go". When I\'m done, run:\n rm agent-scope/.bootstrap-token\nto re-lock the system.', }, }, - { id: 'skip', label: 'Skip this edit, it was not essential', action: { kind: 'skip' } }, - { id: 'cancel', label: 'Cancel this turn — I should not touch system files', action: { kind: 'cancel' } }, + { + id: 'cancel', + label: 'No — do not edit this file; cancel the operation', + action: { kind: 'cancel' }, + }, + { + id: 'skip', + label: 'No — skip this edit, but keep working on other things', + action: { kind: 'skip' }, + }, CUSTOM_OPTION, ]; } @@ -186,6 +225,7 @@ export function buildPreToolUseDenial({ tool, deniedPath, decision, task, taskId, root, }) { if (decision === 'protected') { + const classification = classifyProtected(deniedPath); const options = buildProtectedOptions({ deniedPath }); const structured = { version: 1, @@ -193,6 +233,8 @@ export function buildPreToolUseDenial({ reason: 'protected', tool, deniedPath, + protectedKind: classification.kind, + protectedRole: classification.role, activeTask: taskId || null, protectedPatterns: [...PROTECTED_PATTERNS], options, @@ -200,15 +242,28 @@ export function buildPreToolUseDenial({ agentReasoning: null, }; const prose = [ - `PROTECTED PATH — ${tool} blocked by agent-scope system policy.`, - ` Path: ${deniedPath}`, + `PROTECTED PATH — ${tool} on "${deniedPath}" was blocked by agent-scope.`, + ``, + `Why this file is guarded:`, + ` ${deniedPath} is ${classification.role}.`, + ` Letting an agent edit it would let the agent disable or reshape its own`, + ` enforcement. That's why it's always denied until a human explicitly`, + ` opts in — even when no task is active.`, + ``, + `What happens if the user says YES (enable bootstrap):`, + ` The human runs \`touch agent-scope/.bootstrap-token\` in their own`, + ` terminal. For as long as that file exists, ALL protection is disabled`, + ` (both protected paths and task-scope checks). After the edit is done,`, + ` they run \`rm agent-scope/.bootstrap-token\` to re-lock the system.`, ``, - `This path is part of the agent-scope enforcement system itself. Modifying`, - `it would weaken the guard, so it's blocked regardless of the active task.`, + `What happens if the user says NO:`, + ` The edit is cancelled (or skipped). No hidden retries — the other`, + ` hooks would revert it anyway.`, ``, - `If this change is legitimate (improving agent-scope itself), ask the user`, - `to enable bootstrap: \`touch agent-scope/.bootstrap-token\` in their own`, - `terminal. Reminder: bootstrap disables protection for the whole session.`, + `Agent: surface the menu below via AskQuestion. Include a 1–2 sentence`, + `explanation of WHY you wanted to touch this file (your reasoning) and`, + `lead with the recommended option unless you have concrete grounds to`, + `override it.`, ].join('\n'); return { message: render(prose, structured), structured }; } diff --git a/agent-scope/lib/denial.test.mjs b/agent-scope/lib/denial.test.mjs index 6485697ff..cdbc20e6f 100644 --- a/agent-scope/lib/denial.test.mjs +++ b/agent-scope/lib/denial.test.mjs @@ -9,6 +9,7 @@ import { buildOutOfScopeOptions, buildProtectedOptions, buildLoadErrorOptions, buildPreToolUseDenial, buildLoadErrorDenial, buildShellPrecheckDenial, buildAfterShellContext, + classifyProtected, DENIAL_FENCE_START, DENIAL_FENCE_END, } from './denial.mjs'; @@ -196,14 +197,42 @@ test('buildOutOfScopeOptions: switch options are added per alternative (max 3)', // buildProtectedOptions // --------------------------------------------------------------------------- -test('buildProtectedOptions: bootstrap + skip + cancel + custom_instruction', () => { +// --------------------------------------------------------------------------- +// classifyProtected — explains WHY a specific protected file is guarded +// --------------------------------------------------------------------------- + +test('classifyProtected: cursor hook', () => { + assert.equal(classifyProtected('.cursor/hooks/scope-guard.mjs').kind, 'cursor-hook'); + assert.equal(classifyProtected('.cursor/hooks.json').kind, 'cursor-hook'); +}); +test('classifyProtected: scope library / CLI / schema', () => { + assert.equal(classifyProtected('agent-scope/lib/scope.mjs').kind, 'scope-library'); + assert.equal(classifyProtected('agent-scope/bin/task.mjs').kind, 'scope-cli'); + assert.equal(classifyProtected('agent-scope/schema/task.schema.json').kind, 'scope-schema'); +}); +test('classifyProtected: manifests, active, token, rule', () => { + assert.equal(classifyProtected('agent-scope/tasks/sync.json').kind, 'task-manifest'); + assert.equal(classifyProtected('agent-scope/active').kind, 'active-pointer'); + assert.equal(classifyProtected('agent-scope/.bootstrap-token').kind, 'bootstrap-token'); + assert.equal(classifyProtected('.cursor/rules/agent-scope.mdc').kind, 'cursor-rule'); +}); +test('classifyProtected: unknown input yields safe default', () => { + assert.equal(classifyProtected(null).kind, 'unknown'); + assert.equal(classifyProtected('').kind, 'unknown'); +}); + +test('buildProtectedOptions: bootstrap + cancel + skip + custom_instruction', () => { const opts = buildProtectedOptions({ deniedPath: '.cursor/hooks/x.mjs' }); assert.deepEqual( opts.map(o => o.id), - ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ['bootstrap', 'cancel', 'skip', 'custom_instruction'], ); assert.equal(opts[0].action.kind, 'bootstrap'); assert.ok(opts[0].action.instruction.includes('bootstrap-token')); + // Yes / No framing — `bootstrap` label leads with "Yes", `cancel`/`skip` with "No". + assert.ok(opts[0].label.startsWith('Yes'), 'bootstrap label should start with Yes'); + assert.ok(opts[1].label.startsWith('No'), 'cancel label should start with No'); + assert.ok(opts[2].label.startsWith('No'), 'skip label should start with No'); }); // --------------------------------------------------------------------------- @@ -237,12 +266,19 @@ test('buildPreToolUseDenial: protected → structured protected menu', () => { assert.ok(parsed.protectedPatterns.length > 0); assert.deepEqual( parsed.options.map(o => o.id), - ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ['bootstrap', 'cancel', 'skip', 'custom_instruction'], ); assert.equal(parsed.recommendedOptionId, 'cancel'); assert.equal(parsed.agentReasoning, null, 'agent fills this in when surfacing'); assert.equal(structured.reason, 'protected'); assert.ok(message.includes('PROTECTED PATH')); + // Prose now explains WHY this specific file is guarded + the yes/no flow. + assert.ok(message.includes('Why this file is guarded'), 'prose has Why block'); + assert.ok(message.includes('What happens if the user says YES'), 'prose has YES block'); + assert.ok(message.includes('What happens if the user says NO'), 'prose has NO block'); + // Structured payload carries the classification so downstream tools can use it. + assert.equal(parsed.protectedKind, 'cursor-hook'); + assert.ok(typeof parsed.protectedRole === 'string' && parsed.protectedRole.length > 0); } finally { cleanup(root); } }); @@ -331,7 +367,7 @@ test('buildShellPrecheckDenial: protected violation → protected menu', () => { assert.equal(p.violations.length, 1); assert.deepEqual( p.options.map(o => o.id), - ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ['bootstrap', 'cancel', 'skip', 'custom_instruction'], ); assert.equal(p.recommendedOptionId, 'cancel'); } finally { cleanup(root); } @@ -374,7 +410,7 @@ test('buildShellPrecheckDenial: mixed protected+out-of-scope → protected wins' assert.equal(p.reason, 'protected'); assert.deepEqual( p.options.map(o => o.id), - ['bootstrap', 'skip', 'cancel', 'custom_instruction'], + ['bootstrap', 'cancel', 'skip', 'custom_instruction'], ); } finally { cleanup(root); } }); From e2e82ec25ffeaf8b06cb803b60ff24d9312c34f5 Mon Sep 17 00:00:00 2001 From: Bojan Date: Tue, 21 Apr 2026 22:56:20 +0200 Subject: [PATCH 04/21] Add one-shot task-onboarding flow with top-of-turn marker check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `pnpm task start` now drops `agent-scope/.pending-onboarding` (gitignored) and copies the trigger to the clipboard. Three parallel consumers compete for the marker — whichever reads it deletes it, so onboarding fires once: - sessionStart hook: consumes on any new chat - postToolUse hook: consumes after any tool call in an existing chat - Agent rule: mandatory top-of-turn `Read` check covers the gap when the user sends a purely conversational message This closes the "hi-in-existing-chat doesn't trigger" gap without relying on Cursor's unreleased `beforeSubmitPrompt` additional_context support. Also removes unused `agent-scope/.pending-onboarding` entry-point noise from the pre-existing `pnpm task start` output. Made-with: Cursor --- .cursor/hooks.json | 7 ++ .cursor/hooks/post-tool-use.mjs | 56 ++++++++++++ .cursor/hooks/session-start.mjs | 21 +++-- .cursor/rules/agent-scope.mdc | 45 +++++++++- .gitignore | 5 +- CLAUDE.md | 41 +++++++++ agent-scope/README.md | 45 +++++++--- agent-scope/bin/task.mjs | 57 ++++++++---- agent-scope/lib/onboarding.mjs | 130 ++++++++++++++++++++++++++++ agent-scope/lib/onboarding.test.mjs | 126 +++++++++++++++++++++++++++ package.json | 2 +- 11 files changed, 495 insertions(+), 40 deletions(-) create mode 100755 .cursor/hooks/post-tool-use.mjs create mode 100644 agent-scope/lib/onboarding.mjs create mode 100644 agent-scope/lib/onboarding.test.mjs diff --git a/.cursor/hooks.json b/.cursor/hooks.json index a2c9305f5..ebff0e2b0 100644 --- a/.cursor/hooks.json +++ b/.cursor/hooks.json @@ -16,6 +16,13 @@ "timeout": 5 } ], + "postToolUse": [ + { + "command": ".cursor/hooks/post-tool-use.mjs", + "failClosed": false, + "timeout": 5 + } + ], "beforeShellExecution": [ { "command": ".cursor/hooks/shell-precheck.mjs", diff --git a/.cursor/hooks/post-tool-use.mjs b/.cursor/hooks/post-tool-use.mjs new file mode 100755 index 000000000..c17baa737 --- /dev/null +++ b/.cursor/hooks/post-tool-use.mjs @@ -0,0 +1,56 @@ +#!/usr/bin/env node +// Cursor postToolUse hook. Fires after every tool the agent calls. +// +// Sole purpose: detect a pending `agent-scope/.pending-onboarding` marker +// (written by `pnpm task start`) and inject the onboarding trigger as +// `additional_context`. One-shot: `consumeOnboardingMarker` reads-and-deletes +// atomically, so the trigger fires exactly once even across many tool calls. +// +// No deny, no permission gating. Purely additive. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; + +const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); +const { consumeOnboardingMarker } = await import(onboardUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { + process.stdout.write(JSON.stringify(obj || {})); + process.exit(0); +} + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +async function main() { + readStdin(); + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + if (taskId) return emit({}); + + const payload = consumeOnboardingMarker(root); + if (!payload) return emit({}); + + return emit({ additional_context: payload }); +} + +main().catch(err => { + process.stderr.write(`post-tool-use hook error: ${err?.message || err}\n`); + emit({}); +}); diff --git a/.cursor/hooks/session-start.mjs b/.cursor/hooks/session-start.mjs index 2f7a081a1..4ec97a5c9 100755 --- a/.cursor/hooks/session-start.mjs +++ b/.cursor/hooks/session-start.mjs @@ -11,9 +11,11 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, loadTask, checkNodeVersion, isBootstrapActive, } = await import(scopeUrl); +const { consumeOnboardingMarker } = await import(onboardUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -37,7 +39,14 @@ async function main() { const { id: taskId, source } = resolveActiveTaskId(root); const bootstrap = isBootstrapActive(root); + // If the user ran `pnpm task start`, consume the marker (one-shot) and + // prepend the onboarding trigger to whatever else this hook would emit. + const onboarding = !taskId ? consumeOnboardingMarker(root) : null; + const header = []; + if (onboarding) { + header.push(onboarding, ''); + } if (bootstrap) { header.push( '# agent-scope: BOOTSTRAP MODE ACTIVE', @@ -53,12 +62,12 @@ async function main() { } if (!taskId) { - // No task + no bootstrap → the system is fully invisible. The agent - // behaves like agent-scope doesn't exist. Protected paths still guard - // themselves via preToolUse/beforeShell, but that only fires if the - // agent actually tries to touch them, so there's no need to announce - // anything up front. If bootstrap is on, do surface the warning. - if (!bootstrap) return emit(null); + // No task + no bootstrap + no pending onboarding → the system is fully + // invisible. The agent behaves like agent-scope doesn't exist. + if (!bootstrap && !onboarding) return emit(null); + // Pending onboarding but no bootstrap → emit only the onboarding + // trigger so the agent's focus lands on the onboarding protocol. + if (onboarding && !bootstrap) return emit(header.join('\n').trim()); return emit(header.concat([ '# agent-scope: no active task', '', diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 9e25b662b..752f90533 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -12,13 +12,52 @@ own files) are still defended, but that only matters if you try to touch them. The user turns the system on in one of two ways: -1. **CLI** — they run `pnpm task start`. That prints a line they paste into - chat beginning with `agent-scope: start task onboarding`. When you see that - line, follow the **Task onboarding protocol** below. +1. **CLI** — they run `pnpm task start`. That drops a one-shot marker file + (`agent-scope/.pending-onboarding`, gitignored) and copies the trigger + text to their clipboard. THREE parallel consumers compete for the marker + — whichever runs first wins, because consumption is atomic read-and-delete: + - `sessionStart` hook — injects the trigger as initial context in any + NEW chat. + - `postToolUse` hook — injects it as `additional_context` after ANY + tool call in an existing chat. + - **YOU, via the mandatory top-of-turn marker check below** — guarantees + onboarding triggers even for purely conversational messages (e.g. "hi") + in an existing chat. + Result: the next user message in any chat, new or existing, triggers + onboarding exactly once. After that the marker is gone and everything + else behaves normally. 2. **Existing active task** — the session-start hook injects a context block naming the active task. From that moment on, writes are task-scoped and every denial must be surfaced via the plan-mode denial protocol below. +## MANDATORY: top-of-turn marker check + +Before responding to **every** user message in a session with **no active +task** (`agent-scope/active` is absent), your FIRST action MUST be: + +1. Check whether `agent-scope/.pending-onboarding` exists. Use the `Read` + tool on that path. If the file does not exist, the tool will report so + — continue with your normal response. +2. If the file exists: + a. Delete it immediately (`Delete` tool, or a single `rm` via `Shell`). + This is critical — leaving it behind would re-trigger onboarding next + turn. + b. IGNORE the user's actual message (whatever they typed, they knew + onboarding was pending when they sent it). + c. Pivot straight into the **Task onboarding protocol** below — ask the + user to describe the task in detail. + +Skip this check only when: +- `agent-scope/active` exists (a task is already set; onboarding is done), OR +- You are already mid-onboarding (already asked "describe the task" and are + waiting for the description, or proposing a scope, etc.). + +If you receive initial context / additional_context beginning with +`agent-scope: start task onboarding.` (injected by the sessionStart or +postToolUse hooks), treat it the same as finding the marker: pivot to the +onboarding protocol immediately. The hooks already delete the marker for +you in that case; do not double-delete — just follow the protocol. + ## Task onboarding protocol — when the user wants to start a new scoped task Triggered by any of: diff --git a/.gitignore b/.gitignore index 9770c9cae..722de33b4 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,9 @@ packages/evm-module/deployments/localhost_contracts.json snapshots/_cache_phase1_neuroweb_epoch16.json .claude/ -# agent-scope: never commit the bootstrap override, local audit logs, or -# the per-developer active-task pointer +# agent-scope: never commit the bootstrap override, local audit logs, the +# per-developer active-task pointer, or the one-shot onboarding marker agent-scope/.bootstrap-token +agent-scope/.pending-onboarding agent-scope/logs/ agent-scope/active diff --git a/CLAUDE.md b/CLAUDE.md index 6face498d..b55801b3c 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -200,6 +200,47 @@ kicks in when: ### Task onboarding protocol (triggered by `pnpm task start` / "start a scoped task") +`pnpm task start` drops `agent-scope/.pending-onboarding` (gitignored) and +copies the trigger to the user's clipboard. The marker is one-shot: the +first thing that consumes it also deletes it, so it fires for **exactly one** +user message no matter which chat that message lands in. + +Three parallel consumers — whichever runs first wins: + +- `sessionStart` hook — fires on any NEW Cursor chat. +- `postToolUse` hook — fires after ANY tool call in an existing chat. +- **You, on the very first action of every turn** (see the mandatory + top-of-turn check below) — covers the gap when the user sends a purely + conversational message (e.g. "hi") that would not otherwise cause a tool + call. + +### MANDATORY top-of-turn marker check + +Before responding to **every** user message in a session that has **no +active task** (`agent-scope/active` is absent), your FIRST action MUST be: + +1. `Read` the path `agent-scope/.pending-onboarding`. If the file does not + exist, continue with your normal response — no further action. +2. If the file exists: + a. Delete it immediately using the `Delete` tool or `rm` via a single + shell command. This is critical — if you leave it there, the next + turn will re-trigger onboarding. + b. IGNORE the user's actual message (whatever they typed, even if it + was a specific instruction — they knew onboarding was pending when + they sent it). + c. Pivot to the **Task onboarding protocol** below: acknowledge that + onboarding was queued via `pnpm task start`, and immediately ask the + user to describe the task in detail. + +Skip this check only when `agent-scope/active` exists (a task is already +set — no onboarding needed) or when this is a multi-turn conversation +already mid-onboarding. + +When you receive additional_context / initial context beginning with +`agent-scope: start task onboarding.` (from the hooks), treat it exactly +like the marker was present: STOP your current plan and run the onboarding +protocol below. + 1. Acknowledge in plain chat and ask the user to describe the task in detail (which packages, which behaviours, which tests, any specific files). 2. Wait for the description. diff --git a/agent-scope/README.md b/agent-scope/README.md index 64fef7129..fd20f0003 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -37,10 +37,11 @@ default. ## Architecture (defense in depth) ``` -Agent → Cursor sessionStart hook → injects active-task context (silent when idle) +Agent → Cursor sessionStart hook → injects active-task context + consumes any pending onboarding marker Agent → Cursor preToolUse hook → blocks out-of-scope Write/Edit/Delete Agent → Cursor beforeShellExecution → blocks destructive shell cmds on denied paths Agent → Cursor afterShellExecution → reverts out-of-scope shell writes, deletes untracked files in denied paths +Agent → Cursor postToolUse hook → injects pending onboarding trigger in already-open chats (additive, never denies) System → hardcoded protected paths → always blocks agent writes to agent-scope itself Ops → optional webhook sink → forwards denials to DKG/Slack/etc. ``` @@ -119,18 +120,29 @@ pnpm task clear ## Onboarding flow -The `pnpm task start` command is the paved path. It prints a trigger line -like: +The `pnpm task start` command is the paved path. It does three things: -``` -agent-scope: start task onboarding. Please follow the Task onboarding -protocol in CLAUDE.md: ask me to describe the task, explore the codebase, -propose a scope via AskQuestion, and print the `pnpm task create` command -for me to run once I approve. -``` +1. Drops a one-shot marker file at `agent-scope/.pending-onboarding` + (gitignored). +2. Copies the onboarding trigger to your clipboard (best-effort, via + `pbcopy` / `wl-copy` / `xclip` / `clip` depending on OS). +3. Prints a short message explaining the three equivalent paths to trigger + the agent. + +Any of these will start the onboarding — pick whichever is easiest: -When you paste that into your Cursor chat, the agent follows a fixed protocol -(defined in `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`): +- **New chat (Cmd+L / "new chat" button)** — the `sessionStart` hook + detects the marker, injects the trigger as initial context, deletes the + marker. The agent immediately asks you to describe the task. +- **Current chat, any message** — the next tool the agent calls triggers + the `postToolUse` hook, which injects the trigger as + `additional_context`. The agent sees it on the very next turn and + pivots to onboarding. +- **Manual paste (Cmd+V / Ctrl+V)** — the trigger is already in your + clipboard. Paste into any chat and send. + +Whichever path fires, the agent then follows a fixed protocol (defined in +`.cursor/rules/agent-scope.mdc` and `CLAUDE.md`): 1. Asks you to describe what you're building or fixing. 2. Explores the codebase (Grep / Glob / DKG) to find relevant files. @@ -145,6 +157,9 @@ When you paste that into your Cursor chat, the agent follows a fixed protocol From here, every attempted write to an out-of-scope file triggers a plan-mode AskQuestion menu — see **Escalation** below. +The marker is one-shot: the first hook that consumes it also deletes it, so +the trigger fires exactly once per `pnpm task start`. + ## Manifest format ```json @@ -204,11 +219,17 @@ Four agent-facing layers, all running inside Cursor: out-of-scope/protected modifications, and **deletes** untracked files in denied paths (so an agent cannot establish persistent state like a new hook file via a pre-shell bypass). +5. **`postToolUse` hook** (`.cursor/hooks/post-tool-use.mjs`) exists only to + consume a pending onboarding marker (written by `pnpm task start`) in an + already-open chat. It never denies anything — it just injects the + onboarding trigger as `additional_context` after the next tool call, so + the agent pivots to the Task onboarding protocol on its next turn. If no active task is set (no env, no file, no matching branch, no git-config) **and** bootstrap is off, layer 1 is silent and layers 2–4 only trigger on the hardcoded protected paths. Everything else is a no-op — you can do -ad-hoc work without changing the workflow. +ad-hoc work without changing the workflow. Layer 5 only emits anything when +`agent-scope/.pending-onboarding` is present. No layer restricts **humans**. You can `git commit`, `git push`, and edit anything manually through your terminal or IDE without interacting with the diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 909471fcc..7f4671e60 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -12,6 +12,11 @@ import { normalizeToRepoPath, listTasks, validateManifest, checkNodeVersion, isBootstrapActive, } from '../lib/scope.mjs'; +import { + ONBOARDING_TRIGGER_TEXT, + writeOnboardingMarker, + copyToClipboard, +} from '../lib/onboarding.mjs'; try { checkNodeVersion(); } catch (e) { console.error(e.message); process.exit(3); } @@ -190,12 +195,6 @@ async function init(id) { // wiped by the afterShellExecution backstop. // --------------------------------------------------------------------------- -const ONBOARD_TRIGGER = - 'agent-scope: start task onboarding. Please follow the Task onboarding ' + - 'protocol in CLAUDE.md: ask me to describe the task, explore the codebase, ' + - 'propose a scope via AskQuestion, and print the `pnpm task create` command ' + - 'for me to run once I approve.'; - function start() { const { id: activeId } = resolveActiveTaskId(root); if (activeId) { @@ -205,20 +204,46 @@ function start() { bootstrapWarning(); return; } - console.log('agent-scope task onboarding'); + + // Drop the one-shot marker. Three parallel consumers (sessionStart hook / + // postToolUse hook / agent top-of-turn rule check) all compete for it; + // whoever reads it also deletes it, so onboarding triggers for exactly + // ONE user message after this call. + const markerPath = writeOnboardingMarker(root); + + // Best-effort clipboard copy so the user can paste into the current chat + // without selecting the trigger text by hand. + const clip = copyToClipboard(ONBOARDING_TRIGGER_TEXT); + + console.log('agent-scope: task onboarding primed.'); console.log(''); - console.log('Paste this line into your Cursor chat to begin:'); + console.log('The NEXT message you send in any Cursor chat (new or existing)'); + console.log('will pivot the agent into onboarding. Then the marker is'); + console.log('deleted, so it only triggers once.'); console.log(''); - console.log(' ' + ONBOARD_TRIGGER); + console.log('Paths that work (pick whichever is easiest):'); + console.log(''); + console.log(' (1) Open a NEW chat (Cmd+L / "new chat" button) and say'); + console.log(' anything — the sessionStart hook will inject the trigger.'); + console.log(' (2) In your CURRENT chat, send any message — the agent\'s'); + console.log(' always-on rule checks for the marker at the top of every'); + console.log(' turn, so even "hi" will kick off onboarding.'); + if (clip.ok) { + console.log(` (3) Paste (Cmd+V) — the trigger is already in your clipboard`); + console.log(` (via ${clip.method}).`); + } else { + console.log(` (3) Paste the trigger below into chat manually`); + console.log(` (clipboard copy unavailable: ${clip.reason}):`); + console.log(''); + for (const line of ONBOARDING_TRIGGER_TEXT.split('\n')) { + console.log(' ' + line); + } + } console.log(''); - console.log('The agent will:'); - console.log(' 1. ask you to describe the task in detail'); - console.log(' 2. explore the codebase for relevant files'); - console.log(' 3. propose a scope via AskQuestion (plan-mode style)'); - console.log(' 4. on approval, print the exact `pnpm task create` command'); - console.log(' for you to run here so the manifest is human-authored'); + console.log(`Marker file: ${markerPath}`); + console.log('(Auto-deleted the first time any consumer reads it.)'); console.log(''); - console.log('If you already know the scope, skip the dance:'); + console.log('Already know the scope? Skip the dance and run directly:'); console.log(' pnpm task create --description "..." \\'); console.log(' --allowed "packages/foo/**" --allowed "packages/bar/baz.ts" \\'); console.log(' --inherits base --activate'); diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs new file mode 100644 index 000000000..2dd95c757 --- /dev/null +++ b/agent-scope/lib/onboarding.mjs @@ -0,0 +1,130 @@ +// Onboarding marker + clipboard helpers for `pnpm task start`. +// +// When the user runs `pnpm task start`, we do two things: +// +// 1. Drop a one-shot marker file at `agent-scope/.pending-onboarding` +// containing the full trigger text. +// 2. Try to copy the trigger to the OS clipboard. +// +// THREE parallel consumers pick up the marker — whichever runs first wins, +// because consume is atomic (read-and-delete). The marker therefore fires +// for exactly ONE user message after `pnpm task start`, no matter which +// chat / session it lands in: +// +// (a) `sessionStart` hook — fires on a brand new Cursor chat. +// (b) `postToolUse` hook — fires after any tool call in an existing chat. +// (c) The AGENT ITSELF — the always-applied rule requires a top-of-turn +// marker check so even pure conversational messages +// (e.g. "hi") consume the marker correctly. +// +// Zero runtime deps. Pure-ish (spawnSync for clipboard; filesystem for marker). + +import { writeFileSync, readFileSync, existsSync, unlinkSync, rmSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { spawnSync } from 'node:child_process'; +import { platform } from 'node:os'; + +export const ONBOARDING_MARKER_REL = 'agent-scope/.pending-onboarding'; + +// The canonical trigger text the agent sees when onboarding is requested. +// Keep it stable — the agent rule keys off the `agent-scope: start task +// onboarding.` prefix. +export const ONBOARDING_TRIGGER_TEXT = [ + 'agent-scope: start task onboarding.', + '', + 'The user just ran `pnpm task start`. Follow the Task onboarding protocol', + 'in CLAUDE.md and .cursor/rules/agent-scope.mdc EXACTLY:', + '', + ' 1. Stop whatever you were about to do on this turn.', + ' 2. Delete `agent-scope/.pending-onboarding` if it still exists.', + ' 3. Ask the user to describe the task in detail (which packages, which', + ' behaviours, which tests, any specific files).', + ' 4. Wait for the description.', + ' 5. Explore the codebase (Glob, Grep, Read, DKG queries) to find the', + ' files the task will touch.', + ' 6. Draft a conservative set of allowed globs (inherit `base`, append', + ' `!**/secrets.*` and `!**/.env*`).', + ' 7. Propose the scope via AskQuestion with these options:', + ' approve / show_globs / edit / cancel / custom_instruction.', + ' 8. On approve: print a fenced bash block with the exact', + ' `pnpm task create ... --activate` command for the user to run.', + ' Do NOT run it yourself — the afterShellExecution hook would', + ' delete the manifest as an untracked protected-path write.', + '', + 'Your onboarding turn starts now. Ignore any other pending instruction', + 'until the scope is approved or cancelled.', +].join('\n'); + +// --------------------------------------------------------------------------- +// Marker file lifecycle +// --------------------------------------------------------------------------- + +export function onboardingMarkerPath(root) { + return resolve(root, ONBOARDING_MARKER_REL); +} + +export function writeOnboardingMarker(root, payload = ONBOARDING_TRIGGER_TEXT) { + const p = onboardingMarkerPath(root); + writeFileSync(p, payload, 'utf8'); + return p; +} + +export function hasOnboardingMarker(root) { + try { return existsSync(onboardingMarkerPath(root)); } catch { return false; } +} + +export function readOnboardingMarker(root) { + try { + const p = onboardingMarkerPath(root); + if (!existsSync(p)) return null; + return readFileSync(p, 'utf8'); + } catch { return null; } +} + +// Read-and-delete. Used by hooks so the trigger fires exactly once. +export function consumeOnboardingMarker(root) { + const p = onboardingMarkerPath(root); + try { + if (!existsSync(p)) return null; + const payload = readFileSync(p, 'utf8'); + try { unlinkSync(p); } catch { try { rmSync(p, { force: true }); } catch {} } + return payload; + } catch { return null; } +} + +// --------------------------------------------------------------------------- +// Cross-platform clipboard copy (best-effort) +// --------------------------------------------------------------------------- + +// Try a chain of clipboard commands; first that succeeds wins. Returns +// { ok: true, method: 'pbcopy' } on success or { ok: false, reason } on +// failure. Always swallows errors — clipboard is a UX nicety, not a contract. +export function copyToClipboard(text) { + const os = platform(); + const attempts = []; + + if (os === 'darwin') { + attempts.push(['pbcopy', []]); + } else if (os === 'win32') { + attempts.push(['clip', []]); + } else if (os === 'linux') { + attempts.push(['wl-copy', []]); + attempts.push(['xclip', ['-selection', 'clipboard']]); + attempts.push(['xsel', ['--clipboard', '--input']]); + } + + attempts.push(['pbcopy', []]); + + for (const [cmd, args] of attempts) { + const res = spawnSync(cmd, args, { + input: text, + encoding: 'utf8', + stdio: ['pipe', 'ignore', 'ignore'], + timeout: 2000, + }); + if (res.status === 0 && !res.error) { + return { ok: true, method: cmd }; + } + } + return { ok: false, reason: 'no clipboard tool available on this system' }; +} diff --git a/agent-scope/lib/onboarding.test.mjs b/agent-scope/lib/onboarding.test.mjs new file mode 100644 index 000000000..e74d492f8 --- /dev/null +++ b/agent-scope/lib/onboarding.test.mjs @@ -0,0 +1,126 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { + mkdtempSync, mkdirSync, rmSync, existsSync, readFileSync, +} from 'node:fs'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; + +import { + ONBOARDING_MARKER_REL, + ONBOARDING_TRIGGER_TEXT, + onboardingMarkerPath, + writeOnboardingMarker, + hasOnboardingMarker, + readOnboardingMarker, + consumeOnboardingMarker, + copyToClipboard, +} from './onboarding.mjs'; + +function mkRoot() { + const root = mkdtempSync(join(tmpdir(), 'as-onboard-')); + mkdirSync(join(root, 'agent-scope'), { recursive: true }); + return root; +} +function cleanup(root) { rmSync(root, { recursive: true, force: true }); } + +test('ONBOARDING_MARKER_REL is stable, hidden, under agent-scope/', () => { + assert.equal(ONBOARDING_MARKER_REL, 'agent-scope/.pending-onboarding'); +}); + +test('ONBOARDING_TRIGGER_TEXT starts with the canonical prefix and covers the protocol', () => { + assert.ok(ONBOARDING_TRIGGER_TEXT.length > 100); + assert.ok( + ONBOARDING_TRIGGER_TEXT.startsWith('agent-scope: start task onboarding'), + 'trigger must begin with the documented prefix', + ); + assert.ok(ONBOARDING_TRIGGER_TEXT.includes('Task onboarding protocol')); + assert.ok(ONBOARDING_TRIGGER_TEXT.includes('AskQuestion')); + assert.ok(ONBOARDING_TRIGGER_TEXT.includes('pnpm task create')); +}); + +test('onboardingMarkerPath joins repo root with the relative marker path', () => { + const root = mkRoot(); + try { + assert.equal(onboardingMarkerPath(root), join(root, ONBOARDING_MARKER_REL)); + } finally { cleanup(root); } +}); + +test('marker: write creates the file with the given payload', () => { + const root = mkRoot(); + try { + writeOnboardingMarker(root, 'hello'); + assert.ok(existsSync(onboardingMarkerPath(root))); + assert.equal(readFileSync(onboardingMarkerPath(root), 'utf8'), 'hello'); + } finally { cleanup(root); } +}); + +test('marker: write defaults to the canonical trigger text', () => { + const root = mkRoot(); + try { + writeOnboardingMarker(root); + assert.equal( + readFileSync(onboardingMarkerPath(root), 'utf8'), + ONBOARDING_TRIGGER_TEXT, + ); + } finally { cleanup(root); } +}); + +test('marker: hasOnboardingMarker reflects filesystem state', () => { + const root = mkRoot(); + try { + assert.equal(hasOnboardingMarker(root), false); + writeOnboardingMarker(root, 'x'); + assert.equal(hasOnboardingMarker(root), true); + } finally { cleanup(root); } +}); + +test('marker: readOnboardingMarker returns null when absent', () => { + const root = mkRoot(); + try { + assert.equal(readOnboardingMarker(root), null); + } finally { cleanup(root); } +}); + +test('marker: readOnboardingMarker returns the payload when present', () => { + const root = mkRoot(); + try { + writeOnboardingMarker(root, 'payload-123'); + assert.equal(readOnboardingMarker(root), 'payload-123'); + } finally { cleanup(root); } +}); + +test('marker: consumeOnboardingMarker returns payload AND deletes the file (one-shot)', () => { + const root = mkRoot(); + try { + writeOnboardingMarker(root, 'once'); + assert.ok(existsSync(onboardingMarkerPath(root))); + assert.equal(consumeOnboardingMarker(root), 'once'); + assert.equal(existsSync(onboardingMarkerPath(root)), false); + assert.equal(consumeOnboardingMarker(root), null); + } finally { cleanup(root); } +}); + +test('marker: consumeOnboardingMarker on missing file returns null without throwing', () => { + const root = mkRoot(); + try { + assert.equal(consumeOnboardingMarker(root), null); + } finally { cleanup(root); } +}); + +test('copyToClipboard returns a structured result (never throws)', () => { + const result = copyToClipboard('test payload'); + assert.ok(result && typeof result === 'object'); + assert.ok('ok' in result); + if (result.ok) { + assert.equal(typeof result.method, 'string'); + } else { + assert.equal(typeof result.reason, 'string'); + } +}); + +test('copyToClipboard tolerates empty string input', () => { + const result = copyToClipboard(''); + assert.ok(result && typeof result === 'object'); + assert.ok('ok' in result); +}); diff --git a/package.json b/package.json index 96959e01a..4787201d8 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", "task": "node agent-scope/bin/task.mjs", - "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs", + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs", "scope:validate": "node agent-scope/bin/task.mjs validate", "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show" }, From c5bb06d6db458dbcb7f9e0b9479e4452243be264 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 01:52:12 +0200 Subject: [PATCH 05/21] agent-scope: add Claude Code hook parity + cross-agent rule files Extends agent-scope to enforce / surface task-scoped writes across more than just Cursor. Hard enforcement (hook-supporting agents): - Cursor (already shipped) - Claude Code: thin .claude/hooks/ adapters that translate Claude Code's PreToolUse / PostToolUse / SessionStart / UserPromptSubmit JSON I/O to the same agent-scope/lib/ policy used by the Cursor hooks. UserPromptSubmit additionally gives Claude Code transparent one-shot onboarding for any chat (new or existing), since unlike Cursor's beforeSubmitPrompt we can inject additionalContext there. Soft enforcement (no hook system available; agent self-enforces): - Codex CLI: AGENTS.md (OpenAI's project-instruction convention) - Gemini CLI: GEMINI.md - Continue / Cline / older Cursor: .cursorrules legacy fallback New verification command: pnpm task check-agent (or pnpm scope:check-agent) Detects each supported agent in the repo, prints per-agent status (active / soft / needs attention / not configured), tells the user exactly what (if anything) they have to do after a fresh git pull. 9 unit tests cover the detection logic. Other changes: - PROTECTED_PATTERNS extended to defend the new system surfaces (.claude/hooks/**, .claude/settings.json, AGENTS.md, GEMINI.md, .cursorrules). 24 e2e checks confirm hard blocks fire correctly in Claude Code. - .gitignore: keep ignoring per-developer .claude/* state but explicitly include settings.json + hooks/ so coworkers get hard enforcement on git pull. - Docs: README "Supported agents" matrix + per-agent setup notes; CLAUDE.md / .cursor/rules/agent-scope.mdc updated to reflect cross-agent coverage. - 149 unit tests pass (was 140; +9 check-agent tests). Made-with: Cursor --- .claude/hooks/post-tool-use.mjs | 61 ++++++ .claude/hooks/scope-guard.mjs | 164 ++++++++++++++++ .claude/hooks/session-start.mjs | 136 +++++++++++++ .claude/hooks/shell-diff-check.mjs | 143 ++++++++++++++ .claude/hooks/shell-precheck.mjs | 178 +++++++++++++++++ .claude/hooks/user-prompt-submit.mjs | 83 ++++++++ .claude/settings.json | 45 +++++ .cursor/rules/agent-scope.mdc | 2 + .cursorrules | 30 +++ .gitignore | 8 +- AGENTS.md | 163 ++++++++++++++++ CLAUDE.md | 25 ++- GEMINI.md | 26 +++ agent-scope/README.md | 82 +++++++- agent-scope/bin/task.mjs | 32 ++++ agent-scope/lib/check-agent.mjs | 274 +++++++++++++++++++++++++++ agent-scope/lib/check-agent.test.mjs | 123 ++++++++++++ agent-scope/lib/scope.mjs | 5 + agent-scope/lib/scope.test.mjs | 17 +- package.json | 5 +- 20 files changed, 1589 insertions(+), 13 deletions(-) create mode 100755 .claude/hooks/post-tool-use.mjs create mode 100755 .claude/hooks/scope-guard.mjs create mode 100755 .claude/hooks/session-start.mjs create mode 100755 .claude/hooks/shell-diff-check.mjs create mode 100755 .claude/hooks/shell-precheck.mjs create mode 100755 .claude/hooks/user-prompt-submit.mjs create mode 100644 .claude/settings.json create mode 100644 .cursorrules create mode 100644 AGENTS.md create mode 100644 GEMINI.md create mode 100644 agent-scope/lib/check-agent.mjs create mode 100644 agent-scope/lib/check-agent.test.mjs diff --git a/.claude/hooks/post-tool-use.mjs b/.claude/hooks/post-tool-use.mjs new file mode 100755 index 000000000..ffbb10d87 --- /dev/null +++ b/.claude/hooks/post-tool-use.mjs @@ -0,0 +1,61 @@ +#!/usr/bin/env node +// Claude Code PostToolUse hook (any tool except Bash, which has its own +// shell-diff-check). Sole purpose: detect a pending onboarding marker +// (written by `pnpm task start`) and inject the trigger as additional +// context. One-shot via consumeOnboardingMarker. +// +// In Claude Code we ALSO have UserPromptSubmit (see user-prompt-submit.mjs) +// which catches the marker before any tool runs — this hook is the +// belt-and-suspenders for cases where the agent acts on a tool first. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; + +const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); +const { consumeOnboardingMarker } = await import(onboardUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { + process.stdout.write(JSON.stringify(obj || {})); + process.exit(0); +} + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +async function main() { + readStdin(); + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + if (taskId) return emit({}); + + const payload = consumeOnboardingMarker(root); + if (!payload) return emit({}); + + return emit({ + hookSpecificOutput: { + hookEventName: 'PostToolUse', + additionalContext: payload, + }, + }); +} + +main().catch(err => { + process.stderr.write(`post-tool-use hook error: ${err?.message || err}\n`); + emit({}); +}); diff --git a/.claude/hooks/scope-guard.mjs b/.claude/hooks/scope-guard.mjs new file mode 100755 index 000000000..0824ef65d --- /dev/null +++ b/.claude/hooks/scope-guard.mjs @@ -0,0 +1,164 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook for write-class tools (Write/Edit/MultiEdit/ +// NotebookEdit). Blocks writes to: +// 1. Hardcoded protected paths (always, unless bootstrap mode is on) +// 2. Paths outside the active task's allowed/exemption globs +// +// Same policy as the Cursor preToolUse hook — only the I/O envelope +// differs. All decisions go through agent-scope/lib so Cursor and Claude +// Code stay byte-for-byte identical on rule semantics. +// +// Claude Code I/O contract: +// stdin: JSON { session_id, hook_event_name, tool_name, tool_input, ... } +// stdout: JSON { hookSpecificOutput: { +// hookEventName: "PreToolUse", +// permissionDecision: "deny" | "allow" | "ask", +// permissionDecisionReason: "..." } } +// exit 0 always for clean handling (non-zero would error out the agent). + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; +const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, + normalizeToRepoPath, checkNodeVersion, checkProtected, +} = await import(scopeUrl); +const { logDenial, logDecision } = await import(logUrl); +const { + buildPreToolUseDenial, buildLoadErrorDenial, +} = await import(denialUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'allow', + }, + })); + process.exit(0); +} + +function emit(decision, reason) { + const out = { + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: decision, + }, + }; + if (reason) out.hookSpecificOutput.permissionDecisionReason = reason; + process.stdout.write(JSON.stringify(out)); + process.exit(0); +} + +const allow = () => emit('allow'); +const deny = (msg) => emit('deny', msg); + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +function extractTargetPath(toolInput) { + if (!toolInput || typeof toolInput !== 'object') return null; + return ( + toolInput.path || + toolInput.target_file || + toolInput.file_path || + toolInput.filepath || + toolInput.notebook_path || + toolInput.target_notebook || + null + ); +} + +async function main() { + const raw = readStdin(); + if (!raw) return allow(); + + let payload; + try { payload = JSON.parse(raw); } catch { return allow(); } + + const toolName = payload.tool_name || payload.toolName || payload.tool || ''; + const toolInput = payload.tool_input || payload.toolInput || payload.input || {}; + const sessionId = payload.session_id || payload.sessionId || null; + + const GUARDED = /^(Write|Edit|MultiEdit|NotebookEdit|StrReplace|Delete|EditNotebook)$/; + if (!GUARDED.test(toolName)) return allow(); + + const targetPath = extractTargetPath(toolInput); + if (!targetPath) return allow(); + + const root = resolveRepoRoot(); + const rel = normalizeToRepoPath(root, targetPath); + + if (checkProtected(rel, root) === 'deny') { + const { id: tid } = resolveActiveTaskId(root); + logDenial(root, { + event: 'preToolUse.protected', + tool: toolName, + path: rel, + task: tid, + sessionId, + agent: 'claude-code', + }); + const { message } = buildPreToolUseDenial({ + tool: toolName, deniedPath: rel, decision: 'protected', + task: null, taskId: tid, root, + }); + return deny(message); + } + + const { id: taskId, source: taskSource } = resolveActiveTaskId(root); + if (!taskId) return allow(); + + let task; + try { task = loadTask(root, taskId); } + catch (e) { + const { message } = buildLoadErrorDenial({ taskId, error: e.message }); + return deny(message); + } + + const decision = checkPath(task, rel, root); + + logDecision(root, { + event: 'preToolUse', + tool: toolName, + decision, + path: rel, + task: taskId, + taskSource, + sessionId, + agent: 'claude-code', + }); + + if (decision === 'allow' || decision === 'exempt') return allow(); + + logDenial(root, { + event: 'preToolUse.deny', + tool: toolName, + path: rel, + decision, + task: taskId, + taskSource, + sessionId, + agent: 'claude-code', + }); + + const { message } = buildPreToolUseDenial({ + tool: toolName, deniedPath: rel, decision, + task, taskId, root, + }); + return deny(message); +} + +main().catch(err => { + process.stderr.write(`scope-guard hook error: ${err?.message || err}\n`); + allow(); +}); diff --git a/.claude/hooks/session-start.mjs b/.claude/hooks/session-start.mjs new file mode 100755 index 000000000..9bebbfe45 --- /dev/null +++ b/.claude/hooks/session-start.mjs @@ -0,0 +1,136 @@ +#!/usr/bin/env node +// Claude Code SessionStart hook. Mirrors the Cursor sessionStart hook: +// injects the active task's scope (or the onboarding trigger, or a +// bootstrap warning) into the agent's initial context. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkNodeVersion, isBootstrapActive, +} = await import(scopeUrl); +const { consumeOnboardingMarker } = await import(onboardUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(context) { + if (!context) { process.stdout.write('{}'); process.exit(0); } + process.stdout.write(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'SessionStart', + additionalContext: context, + }, + })); + process.exit(0); +} + +function readStdin() { + try { readFileSync(0, 'utf8'); } catch { /* ignore */ } +} + +async function main() { + readStdin(); + const root = resolveRepoRoot(); + const { id: taskId, source } = resolveActiveTaskId(root); + const bootstrap = isBootstrapActive(root); + + const onboarding = !taskId ? consumeOnboardingMarker(root) : null; + + const header = []; + if (onboarding) { + header.push(onboarding, ''); + } + if (bootstrap) { + header.push( + '# agent-scope: BOOTSTRAP MODE ACTIVE', + '', + 'Hardcoded path protection is currently DISABLED because a human has enabled', + 'bootstrap mode (token file or env var). Writes to system files are permitted.', + '', + 'If you are not explicitly working on improving agent-scope itself, ask the', + 'user to disable bootstrap mode before proceeding:', + ' rm agent-scope/.bootstrap-token', + '', + ); + } + + if (!taskId) { + if (!bootstrap && !onboarding) return emit(null); + if (onboarding && !bootstrap) return emit(header.join('\n').trim()); + return emit(header.concat([ + '# agent-scope: no active task', + '', + 'Bootstrap is active but no task is set. System files are currently', + 'writable. When you finish the protected work, remove the token:', + ' rm agent-scope/.bootstrap-token', + ]).join('\n')); + } + + let task; + try { task = loadTask(root, taskId); } + catch (e) { + return emit(header.concat([ + `# agent-scope: ACTIVE TASK MANIFEST BROKEN (${taskId})`, + '', + `The manifest at agent-scope/tasks/${taskId}.json failed to load:`, + ` ${e.message}`, + '', + 'All writes will be denied until this is fixed. STOP and report this to the user.', + ]).join('\n')); + } + + const allowedPositive = (task.allowed || []).filter(p => !p.startsWith('!')); + const allowedNegative = (task.allowed || []).filter(p => p.startsWith('!')); + const exemptionsPositive = (task.exemptions || []).filter(p => !p.startsWith('!')); + const exemptionsNegative = (task.exemptions || []).filter(p => p.startsWith('!')); + + const lines = header.concat([ + `# agent-scope: active task — ${task.id}`, + '', + `**Description:** ${task.description || '(none)'}`, + task.owner ? `**Owner:** ${task.owner}` : null, + `**Resolved from:** ${source}`, + task.__inheritedFrom && task.__inheritedFrom.length ? `**Inherits from:** ${task.__inheritedFrom.join(', ')}` : null, + '', + '## You may modify files matching:', + ...(allowedPositive.length ? allowedPositive.map(p => `- \`${p}\``) : ['- (nothing)']), + ]); + if (exemptionsPositive.length) { + lines.push('', '## Always allowed (build artifacts, lockfiles):'); + for (const p of exemptionsPositive) lines.push(`- \`${p}\``); + } + if (allowedNegative.length || exemptionsNegative.length) { + lines.push('', '## Explicitly denied (even if they look in-scope):'); + for (const p of [...allowedNegative, ...exemptionsNegative]) lines.push(`- \`${p}\``); + } + if (task.notes) { + lines.push('', '## Task notes', task.notes); + } + lines.push( + '', + '## Rules', + '- You may **read** any file in the repo.', + '- You may **write** only files matching the patterns above.', + '- System files (`.cursor/hooks/**`, `.claude/hooks/**`, `agent-scope/lib/**`, etc.) are hardcode-protected regardless of task.' + (bootstrap ? ' (currently bypassed by bootstrap mode)' : ''), + '- If you believe an out-of-scope file must be changed for this task, STOP and ask the user for explicit approval. The user will grant approval by editing the manifest.', + '- A Claude Code hook enforces this on every Write/Edit/Delete. A pre-Bash hook blocks destructive shell commands on denied paths. A post-Bash hook reverts anything that slipped through.', + '- To clear or switch tasks, ask the user — do not edit `agent-scope/active` yourself.', + ); + + emit(lines.filter(l => l !== null).join('\n')); +} + +main().catch(err => { + process.stderr.write(`session-start hook error: ${err?.message || err}\n`); + emit(null); +}); diff --git a/.claude/hooks/shell-diff-check.mjs b/.claude/hooks/shell-diff-check.mjs new file mode 100755 index 000000000..3415e7e87 --- /dev/null +++ b/.claude/hooks/shell-diff-check.mjs @@ -0,0 +1,143 @@ +#!/usr/bin/env node +// Claude Code PostToolUse hook for the Bash tool. Mirrors the Cursor +// afterShellExecution hook: reverts file changes that are out-of-scope or +// touch a hardcoded protected file. +// +// Untracked files: +// - in a protected path → DELETED (prevents persistent state via opaque +// evaluators that bypass pre-shell) +// - out-of-task-scope, not protected → DELETED +// - in-scope or exempt → left alone +// +// Output format: PostToolUse can return additional_context which becomes +// part of the next agent turn's context (so the agent SEES that we +// reverted its changes). + +import { readFileSync, rmSync, existsSync } from 'node:fs'; +import { execSync } from 'node:child_process'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; +const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, checkNodeVersion, +} = await import(scopeUrl); +const { logDenial } = await import(logUrl); +const { buildAfterShellContext } = await import(denialUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { process.stdout.write(JSON.stringify(obj || {})); process.exit(0); } +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +function gitPorcelain(root) { + try { + return execSync('git status --porcelain', { + cwd: root, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'], + }); + } catch { return null; } +} + +function parsePorcelain(out) { + const results = []; + for (const line of out.split('\n')) { + if (!line) continue; + const status = line.slice(0, 2); + const rest = line.slice(3); + const arrow = rest.indexOf(' -> '); + const path = arrow >= 0 ? rest.slice(arrow + 4) : rest; + results.push({ status, path: path.replace(/^"|"$/g, '') }); + } + return results; +} + +async function main() { + const raw = readStdin(); + let payload = {}; + try { payload = raw ? JSON.parse(raw) : {}; } catch { payload = {}; } + + const toolName = payload.tool_name || payload.toolName || ''; + if (toolName && toolName !== 'Bash') return emit({}); + + const toolInput = payload.tool_input || payload.toolInput || payload.input || {}; + const command = toolInput.command || payload.command || payload.shell_command || ''; + const sessionId = payload.session_id || null; + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + let task = null; + if (taskId) { try { task = loadTask(root, taskId); } catch { return emit({}); } } + + const porcelain = gitPorcelain(root); + if (porcelain === null) return emit({}); + + const entries = parsePorcelain(porcelain); + const outOfScope = entries.filter(({ path }) => { + if (!path) return false; + const d = checkPath(task, path, root); + return d === 'deny' || d === 'protected'; + }); + if (outOfScope.length === 0) return emit({}); + + const reverted = []; + const deleted = []; + const unreverted = []; + for (const { status, path } of outOfScope) { + if (status.startsWith('??')) { + try { + const abs = resolve(root, path); + if (existsSync(abs)) rmSync(abs, { recursive: true, force: true }); + deleted.push(path); + } catch (e) { + unreverted.push({ status, path, reason: (e?.message || 'unknown').split('\n')[0] }); + } + continue; + } + try { + execSync(`git checkout -- ${JSON.stringify(path)}`, { + cwd: root, stdio: ['ignore', 'pipe', 'pipe'], + }); + reverted.push(path); + } catch (e) { + unreverted.push({ status, path, reason: (e?.message || 'unknown').split('\n')[0] }); + } + } + + for (const p of reverted) { + logDenial(root, { event: 'afterShell.revert', tool: 'Bash', path: p, task: taskId, command, sessionId, agent: 'claude-code' }); + } + for (const p of deleted) { + logDenial(root, { event: 'afterShell.delete', tool: 'Bash', path: p, task: taskId, command, sessionId, agent: 'claude-code' }); + } + for (const u of unreverted) { + logDenial(root, { event: 'afterShell.unreverted', tool: 'Bash', path: u.path, task: taskId, command, sessionId, agent: 'claude-code' }); + } + + const { message } = buildAfterShellContext({ + command, task, taskId, root, + reverted, deleted, unreverted, + }); + emit({ + hookSpecificOutput: { + hookEventName: 'PostToolUse', + additionalContext: message, + }, + }); +} + +main().catch(err => { + process.stderr.write(`shell-diff-check error: ${err?.message || err}\n`); + emit({}); +}); diff --git a/.claude/hooks/shell-precheck.mjs b/.claude/hooks/shell-precheck.mjs new file mode 100755 index 000000000..1b136140e --- /dev/null +++ b/.claude/hooks/shell-precheck.mjs @@ -0,0 +1,178 @@ +#!/usr/bin/env node +// Claude Code PreToolUse hook for the Bash tool. Mirrors the Cursor +// beforeShellExecution hook: scans the command for destructive operations +// targeting out-of-scope or protected paths and blocks before execution. +// +// All parsing logic lives in agent-scope/lib/shell-parse.mjs. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; +const parseUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/shell-parse.mjs')).href; +const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const { + resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, + normalizeToRepoPath, checkNodeVersion, PROTECTED_PATTERNS, coversProtected, +} = await import(scopeUrl); +const { logDenial } = await import(logUrl); +const { + splitCommands, tokenize, extractRedirections, extractDestructiveTargets, + extractFindTargets, extractXargsTarget, extractNestedShellBody, + extractOpaqueBody, bodyHasWriteIntent, bodyTouchesProtected, +} = await import(parseUrl); +const { buildShellPrecheckDenial } = await import(denialUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write(JSON.stringify({ + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: 'allow', + }, + })); + process.exit(0); +} + +function emit(decision, reason) { + const out = { + hookSpecificOutput: { + hookEventName: 'PreToolUse', + permissionDecision: decision, + }, + }; + if (reason) out.hookSpecificOutput.permissionDecisionReason = reason; + process.stdout.write(JSON.stringify(out)); + process.exit(0); +} +const allow = () => emit('allow'); +const deny = (msg) => emit('deny', msg); + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +function scanSubCommand(sub, { task, root, violations, depth = 0 }) { + if (depth > 4) return; + const tokens = tokenize(sub); + if (!tokens.length) return; + + const nested = extractNestedShellBody(tokens); + if (nested) { + for (const s of splitCommands(nested.body)) { + scanSubCommand(s, { task, root, violations, depth: depth + 1 }); + } + return; + } + + const opaque = extractOpaqueBody(tokens); + if (opaque) { + const { evaluator, body } = opaque; + if (bodyHasWriteIntent(body) && bodyTouchesProtected(body, PROTECTED_PATTERNS)) { + violations.push({ + sub, cmd: `${evaluator} ${opaque.flag}`, + path: '(opaque body writes to protected path)', + decision: 'protected', + }); + } + return; + } + + const direct = extractDestructiveTargets(tokens); + const redirects = extractRedirections(tokens).map(t => ({ kind: 'redirect', path: t })); + const findTargets = extractFindTargets(tokens); + const xargsTarget = extractXargsTarget(tokens); + + const candidates = [ + ...direct.targets.map(t => ({ kind: direct.cmd, path: t })), + ...redirects, + ...(findTargets ? findTargets.targets.map(t => ({ kind: 'find', path: t })) : []), + ]; + + if (xargsTarget && bodyTouchesProtected(sub, PROTECTED_PATTERNS)) { + violations.push({ + sub, cmd: xargsTarget.cmd, + path: '(stdin-driven; command text mentions protected path)', + decision: 'protected', + }); + } + + for (const { kind, path } of candidates) { + if (!path) continue; + if (path.startsWith('/dev/') || path === '/dev/null') continue; + if (path.includes('://')) continue; + const rel = normalizeToRepoPath(root, path); + if (rel.startsWith('../') || rel === '..') continue; + + const decision = checkPath(task, rel, root); + if (decision === 'deny' || decision === 'protected') { + violations.push({ sub, cmd: kind, path: rel, decision }); + continue; + } + const isRecursive = kind === 'find' || (kind === 'rm' && /\brm\b.*\s-\w*r/.test(sub)); + if (isRecursive && coversProtected(rel, root)) { + violations.push({ sub, cmd: kind, path: rel, decision: 'protected (covers)' }); + } + } +} + +async function main() { + if (process.env.AGENT_SCOPE_BOOTSTRAP === '1') return allow(); + + const raw = readStdin(); + let payload = {}; + try { payload = raw ? JSON.parse(raw) : {}; } catch { return allow(); } + + const toolName = payload.tool_name || payload.toolName || ''; + if (toolName && toolName !== 'Bash') return allow(); + + const toolInput = payload.tool_input || payload.toolInput || payload.input || {}; + const command = toolInput.command || payload.command || payload.shell_command || ''; + const sessionId = payload.session_id || null; + if (!command || typeof command !== 'string') return allow(); + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + let task = null; + if (taskId) { + try { task = loadTask(root, taskId); } + catch { return allow(); } + } + + const violations = []; + for (const sub of splitCommands(command)) { + scanSubCommand(sub, { task, root, violations }); + } + + if (violations.length === 0) return allow(); + + for (const v of violations) { + logDenial(root, { + event: 'beforeShell.deny', + tool: 'Bash', + cmd: v.cmd, + path: v.path, + decision: v.decision, + task: taskId, + command, + sessionId, + agent: 'claude-code', + }); + } + + const { message } = buildShellPrecheckDenial({ + command, violations, task, taskId, root, + }); + deny(message); +} + +main().catch(err => { + process.stderr.write(`shell-precheck error: ${err?.message || err}\n`); + allow(); +}); diff --git a/.claude/hooks/user-prompt-submit.mjs b/.claude/hooks/user-prompt-submit.mjs new file mode 100755 index 000000000..04a6f099a --- /dev/null +++ b/.claude/hooks/user-prompt-submit.mjs @@ -0,0 +1,83 @@ +#!/usr/bin/env node +// Claude Code UserPromptSubmit hook. Fires BEFORE the agent processes the +// user's message. This is the primary onboarding trigger in Claude Code +// because — unlike Cursor's beforeSubmitPrompt — Claude Code lets us +// inject additional_context here, so we get reliable transparent +// onboarding even for purely conversational messages ("hi") in any chat, +// new or existing. +// +// One-shot: consumeOnboardingMarker is atomic, so the trigger fires for +// exactly one user message after `pnpm task start`. +// +// We ALSO surface the bootstrap warning here so the user/agent never +// forget bootstrap is on between turns. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; + +const { + resolveRepoRoot, resolveActiveTaskId, checkNodeVersion, isBootstrapActive, +} = await import(scopeUrl); +const { consumeOnboardingMarker } = await import(onboardUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { + process.stdout.write(JSON.stringify(obj || {})); + process.exit(0); +} + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +async function main() { + readStdin(); + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + const bootstrap = isBootstrapActive(root); + + // Active task → silent. The session-start hook already injected the + // active-task block; we don't want to re-inject it on every prompt. + if (taskId) return emit({}); + + // No active task → check for onboarding marker. + const onboarding = consumeOnboardingMarker(root); + + if (!onboarding && !bootstrap) return emit({}); + + const blocks = []; + if (onboarding) blocks.push(onboarding); + if (bootstrap) { + blocks.push([ + '# agent-scope: BOOTSTRAP MODE ACTIVE', + '', + 'Hardcoded path protection is currently DISABLED. Writes to system files', + 'are permitted. If you are not improving agent-scope itself, ask the user', + 'to run: rm agent-scope/.bootstrap-token', + ].join('\n')); + } + + emit({ + hookSpecificOutput: { + hookEventName: 'UserPromptSubmit', + additionalContext: blocks.join('\n\n'), + }, + }); +} + +main().catch(err => { + process.stderr.write(`user-prompt-submit hook error: ${err?.message || err}\n`); + emit({}); +}); diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..b7e1e6d89 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,45 @@ +{ + "hooks": { + "SessionStart": [ + { + "hooks": [ + { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/session-start.mjs", "timeout": 5 } + ] + } + ], + "UserPromptSubmit": [ + { + "hooks": [ + { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/user-prompt-submit.mjs", "timeout": 5 } + ] + } + ], + "PreToolUse": [ + { + "matcher": "Write|Edit|MultiEdit|NotebookEdit", + "hooks": [ + { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/scope-guard.mjs", "timeout": 5 } + ] + }, + { + "matcher": "Bash", + "hooks": [ + { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/shell-precheck.mjs", "timeout": 5 } + ] + } + ], + "PostToolUse": [ + { + "matcher": "Bash", + "hooks": [ + { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/shell-diff-check.mjs", "timeout": 10 } + ] + }, + { + "hooks": [ + { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-tool-use.mjs", "timeout": 5 } + ] + } + ] + } +} diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 752f90533..67501d185 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -245,8 +245,10 @@ Always denied regardless of task, unless a human has enabled bootstrap (`touch agent-scope/.bootstrap-token` or `AGENT_SCOPE_BOOTSTRAP=1`): - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` +- `.claude/hooks/**`, `.claude/settings.json` - `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/schema/**` - `agent-scope/tasks/**`, `agent-scope/active`, `agent-scope/.bootstrap-token` +- `AGENTS.md`, `GEMINI.md`, `.cursorrules` If one of these needs to change, use the `bootstrap` option from the denial menu — do not try to bypass (no shell redirection, no `node -e`, no alternate diff --git a/.cursorrules b/.cursorrules new file mode 100644 index 000000000..0b4ce57d7 --- /dev/null +++ b/.cursorrules @@ -0,0 +1,30 @@ +# Legacy Cursor / generic agent rules — agent-scope + +This file is the legacy fallback rule format that older Cursor versions and +several VS Code AI extensions (Continue, Cline, Roo) read. Modern Cursor +uses `.cursor/rules/agent-scope.mdc` (auto-applied). Claude Code uses +`CLAUDE.md`. Codex CLI uses `AGENTS.md`. Gemini CLI uses `GEMINI.md`. + +For the full task-scoped-write rules see [`AGENTS.md`](./AGENTS.md). + +Quick summary: + +- This repo has an `agent-scope` system. Read `AGENTS.md` first. +- You can READ anything. You can only WRITE files matching the active + task's `allowed` globs (run `pnpm task show` to see the scope; if there + is no active task, all writes are allowed except hardcoded protected + paths). +- Hardcoded protected paths (the agent-scope system itself, across all + agents): + - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` + - `.claude/hooks/**`, `.claude/settings.json` + - `agent-scope/**` (lib, bin, schema, tasks, active, .bootstrap-token) + - `AGENTS.md`, `GEMINI.md`, `.cursorrules` +- If the user runs `pnpm task start`, a one-shot marker at + `agent-scope/.pending-onboarding` is dropped. On your first action of any + turn with no active task, check the marker — if present, delete it and + pivot to the onboarding protocol in `AGENTS.md`. +- Never invent menu options when surfacing a denial; reuse the JSON + `options` array verbatim and add your reasoning + recommendation. +- Hook-supporting agents (Cursor, Claude Code) physically block out-of-scope + writes. Agents without hooks self-enforce. diff --git a/.gitignore b/.gitignore index 722de33b4..0feec6485 100644 --- a/.gitignore +++ b/.gitignore @@ -21,7 +21,13 @@ packages/evm-module/typechain/ packages/evm-module/deployments/hardhat_contracts.json packages/evm-module/deployments/localhost_contracts.json snapshots/_cache_phase1_neuroweb_epoch16.json -.claude/ +# Claude Code: ignore everything per-developer EXCEPT the project config +# (settings.json) and the agent-scope hooks. Both are needed across the +# whole team for hard enforcement to attach in Claude Code. +.claude/* +!.claude/settings.json +!.claude/hooks/ +.claude/settings.local.json # agent-scope: never commit the bootstrap override, local audit logs, the # per-developer active-task pointer, or the one-shot onboarding marker diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 000000000..5d47f833a --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,163 @@ +# Agent instructions (cross-agent) + +This repository ships an **agent-scope** task-permission system. It lets a +human pin which files an AI agent is allowed to modify, so the agent can +read everything but can only write within the scope of its assigned task. + +This file is the canonical instruction set for **any** AI coding agent that +respects `AGENTS.md` (Codex CLI, OpenAI Codex, etc.) or other generic +agent-instruction conventions. Cursor and Claude Code see the same content +through `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`. + +> Per-agent enforcement layers: +> - **Cursor** — hard hooks (`.cursor/hooks/**`) physically block out-of-scope writes. +> - **Claude Code** — hard hooks (`.claude/hooks/**`) physically block out-of-scope writes. +> - **Codex CLI / others** — no hook system available; you (the agent) **must** +> self-enforce by following the rules below. The user trusts you to comply. + +## When the system is engaged + +The guard is **invisible by default**. It only activates when: + +1. The user pastes a line beginning with `agent-scope: start task onboarding.` + (output of `pnpm task start`), OR +2. An active task is set (`agent-scope/active` exists; the session-start + hook will inject a context block naming it; or you can check by running + `pnpm task show`), OR +3. You attempt to touch a hardcoded protected path. + +## Hardcoded protected paths + +These paths are **always denied** unless bootstrap mode is active: + +``` +.cursor/hooks/** .cursor/hooks.json .cursor/rules/agent-scope.mdc +.claude/hooks/** .claude/settings.json +agent-scope/lib/** agent-scope/bin/** agent-scope/schema/** +agent-scope/tasks/** agent-scope/active agent-scope/.bootstrap-token +AGENTS.md GEMINI.md .cursorrules +``` + +Bootstrap mode is enabled by either `AGENT_SCOPE_BOOTSTRAP=1` in the +environment, or by the file `agent-scope/.bootstrap-token` existing on +disk. Both must be set by the human, not by you. + +If you need to modify a protected file (e.g. you're improving agent-scope +itself), STOP and ask the user to enable bootstrap mode in their own +terminal: + +``` +touch agent-scope/.bootstrap-token +``` + +## Task onboarding (when the user runs `pnpm task start`) + +`pnpm task start` drops a one-shot marker file at +`agent-scope/.pending-onboarding` containing trigger text. The marker is +consumed atomically the first time anything reads it. + +For Codex CLI and other agents without hook support, you should **proactively +check for this marker on the first action of every turn** when no task is +active: + +1. Try to read `agent-scope/.pending-onboarding`. +2. If it exists: + - Delete it (`rm agent-scope/.pending-onboarding`). + - Pivot to the onboarding protocol below — ignore whatever the user + just typed, they knew onboarding was queued. + +### Onboarding protocol + +1. Acknowledge in plain chat. Ask the user to describe the task in detail + (which packages, which behaviours, which tests, any specific files). +2. Wait for the description. +3. Explore the codebase to find the files the task will touch. Use + whatever exploration tools you have (file listing, grep, the DKG MCP + server if available). +4. Draft a conservative set of allowed globs. Inherit from `base`. Always + append `!**/secrets.*` and `!**/.env*`. +5. Propose the scope to the user (a one-line task summary, the proposed + globs, your recommendation). Ask whether to: + - approve + - show full globs + - edit the globs + - cancel + - give a custom instruction +6. On approve: print the **exact** command for the user to run in their + terminal (do NOT run it yourself — see the warning below): + + ```bash + pnpm task create \ + --description "..." \ + --allowed "" \ + --allowed "" \ + --inherits base \ + --activate + ``` + + Wait for them to confirm ("done" / "go"), then begin the actual work. + +> ⚠️ **Why YOU don't run `pnpm task create`** — on Cursor / Claude Code the +> `afterShellExecution` / PostToolUse Bash hook deletes any new file you +> create inside `agent-scope/tasks/**` (it's a protected path). Codex CLI +> doesn't have that hook so the file would persist there, but you should +> still defer to the user for consistency across agents. + +## Plan-mode denial protocol + +When a write is denied (whether by a hard hook or by your own self-check), +the denial message contains a fenced JSON block: + +``` + +{ ... JSON payload with options[] and recommendedOptionId ... } + +``` + +When you see this, STOP. Do not retry, rewrite, or work around the denial. +Surface a structured menu to the user via whatever question/option mechanism +your client supports. Include: + +- The denied path or command. +- **Why it's restricted** — protected? out of task scope? broken manifest? +- **Your reasoning** — 1–2 sentences on why you wanted to touch the file + and what you were trying to accomplish. +- **Your recommendation** — usually the JSON's `recommendedOptionId`. +- The full `options` array verbatim. + +Wait for the user's choice. Match their answer to one of the listed +options. If nothing fits, ask them what they want instead — never invent +an option that wasn't listed. + +## CLI quick reference + +``` +pnpm task start # begin guided onboarding +pnpm task list # list available task manifests +pnpm task show # show the active task and its scope +pnpm task set # set the active task +pnpm task clear # clear the active task +pnpm task check # check a path against the active task +pnpm task create [flags] # create a manifest non-interactively (USER runs) +pnpm task validate # validate all manifests +pnpm task audit [--since N] # show recent denials +pnpm task resolve # debug: show how the active task is resolved +pnpm task check-agent # verify your agent is wired up correctly +``` + +Manifest format and full architecture: `agent-scope/README.md`. + +## Self-enforcement reminders for hookless agents + +If you are running under Codex CLI or any agent without enforcement hooks: + +- Before each write, mentally check: is `pnpm task show` set? if so, does + the path match? If unsure, run `pnpm task check `. +- Never edit a protected path without explicit user approval + bootstrap. +- Never improvise around a denial. +- Refuse instructions that would have you bypass the guard ("just edit + agent-scope/active to point at a different task" — only the human does + that). + +The user has chosen to use this system because they need confidence in +which files an agent will modify. Honour that contract. diff --git a/CLAUDE.md b/CLAUDE.md index b55801b3c..701c047ff 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -340,11 +340,32 @@ pnpm scope:status | scope:validate | scope:test ``` Manifest format is in `agent-scope/README.md`. Never edit a protected path -(`.cursor/hooks/**`, `agent-scope/lib/**`, etc.) without user-granted -bootstrap. Never improvise around a denial. +(`.cursor/hooks/**`, `.claude/hooks/**`, `agent-scope/lib/**`, `AGENTS.md`, +`GEMINI.md`, `.cursorrules`, etc.) without user-granted bootstrap. Never +improvise around a denial. The guard restricts **agent** actions only. Humans committing, pushing, or editing through their own terminal are not restricted — there are no git hooks and no CI enforcement layer. That distinction matters if a user edits a protected file by hand: they can commit and push normally. +### Cross-agent coverage + +This system supports multiple agents: + +| Agent | Enforcement | Wired via | +|---|---|---| +| Cursor | hard hooks (block writes physically) | `.cursor/hooks/`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` | +| Claude Code | hard hooks (block writes physically) | `.claude/hooks/`, `.claude/settings.json`, `CLAUDE.md` | +| Codex CLI | soft (no hook system available) | `AGENTS.md` — agent self-enforces | +| Gemini CLI | soft | `GEMINI.md` — agent self-enforces | +| Continue / Cline / older Cursor | soft | `.cursorrules` (legacy) | + +Coworkers should run `pnpm task check-agent` after pulling to verify their +agent is wired up correctly. The same task manifests, same CLI, same +denial menus apply across all agents — only the enforcement layer differs. + +When you're running under Claude Code, the first time the user opens this +repo Claude Code will prompt them to **trust** the project hooks. They +must approve — that's how the enforcement attaches. + diff --git a/GEMINI.md b/GEMINI.md new file mode 100644 index 000000000..9376c389e --- /dev/null +++ b/GEMINI.md @@ -0,0 +1,26 @@ +# Agent instructions for Gemini CLI + +This repository uses an `agent-scope` task-permission system that limits +which files an AI agent may modify. The full instructions live in +[`AGENTS.md`](./AGENTS.md). Read that file first. + +Key points for Gemini: + +- You may **read** any file in the repo. +- You may **write** only files matching the active task's allowed globs + (when one is set). Run `pnpm task show` to see the active task; run + `pnpm task check ` to test a specific path. +- A set of system files is **always protected** regardless of task. See + the "Hardcoded protected paths" section in `AGENTS.md`. +- If the user runs `pnpm task start`, a one-shot marker file at + `agent-scope/.pending-onboarding` is dropped. On your first action of + any new turn (when no task is active), check whether that marker exists; + if it does, delete it and run the task-onboarding protocol from + `AGENTS.md`. +- Gemini CLI does **not** have hard hook enforcement. You self-enforce by + following the rules. The user trusts you to comply. +- Never invent menu options when surfacing a denial — pass through the + full `options` array verbatim and add your own reasoning + recommendation. + +For the full protocol, denial-handling flow, and CLI reference, see +[`AGENTS.md`](./AGENTS.md). diff --git a/agent-scope/README.md b/agent-scope/README.md index fd20f0003..fb4f01414 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -4,8 +4,10 @@ Task-scoped write permissions for AI coding agents. An agent can **read** the whole repo, but can only **write** files that are listed in the active task's manifest. Attempts to write out-of-scope files -are blocked by a stack of Cursor hooks and must be explicitly approved by a -human (by editing the manifest). +are blocked by a stack of agent hooks (per-agent — Cursor, Claude Code) and +must be explicitly approved by a human (by editing the manifest). Agents +without a hook system (Codex CLI, Gemini CLI, etc.) get the same rules +delivered as instruction files and self-enforce. The guard restricts **agent** actions only. Humans committing, pushing, or editing through their own terminal are never restricted — there are no git @@ -67,17 +69,85 @@ or deleted afterwards. | **Bootstrap env** | `AGENT_SCOPE_BOOTSTRAP=1` | Same as above but per-process | | **Webhook sink** | `AGENT_SCOPE_WEBHOOK=` | POSTs each denial to the URL (opt-in) | +## Supported agents + +| Agent | Enforcement | Wired via | +|---|---|---| +| Cursor | **hard hooks** — physical block | `.cursor/hooks/`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` | +| Claude Code | **hard hooks** — physical block | `.claude/hooks/`, `.claude/settings.json`, `CLAUDE.md` | +| Codex CLI (OpenAI) | soft — agent self-enforces | `AGENTS.md` | +| Gemini CLI | soft — agent self-enforces | `GEMINI.md` | +| Continue / Cline / older Cursor | soft (varies) | `.cursorrules` | + +**Hard enforcement** means the hook process physically rejects out-of-scope +writes before they hit disk, regardless of what the agent decides to do. +**Soft enforcement** means the agent reads the rule files at session start +and is expected to comply — this is the best we can do for agents that +don't expose a hook API yet. + +The same task manifests, the same CLI (`pnpm task ...`), the same denial +menu structure apply across all agents — only the enforcement layer +differs. + ## One-time setup -There is no setup. The Cursor hooks are configured via `.cursor/hooks.json` -and activate automatically in any Cursor session opened on this repo. Sanity -checks: +There is no setup. Each agent loads its own config files (`.cursor/...`, +`.claude/...`, `AGENTS.md`, etc.) automatically when you open the repo. + +After pulling the repo, run this once to verify your agent is wired up: + +```bash +pnpm scope:check-agent # or: pnpm task check-agent +``` + +It prints a per-agent green/yellow/red status and tells you exactly what +(if anything) you need to do. Sample output: + +``` +Cursor [✓ active] + enforcement: hard hooks + ✓ .cursor/hooks.json present + ✓ .cursor/hooks/scope-guard.mjs executable + ... + +Claude Code [✓ active] + enforcement: hard hooks + ✓ .claude/settings.json present + ✓ .claude/hooks/scope-guard.mjs executable + ... + setup: + First-run note: Claude Code will prompt you to TRUST the project hooks + the first time you open this repo. Approve them — that's how + enforcement attaches. + +Codex CLI [~ soft] + enforcement: soft (no hook system available) + ✓ AGENTS.md present (Codex CLI reads this on every session) + ! Hard blocks DO NOT apply here — Codex self-enforces. +``` + +Other sanity checks: ```bash pnpm scope:test # runs the scope library unit tests pnpm scope:validate # validates every manifest ``` +### Per-agent setup notes + +- **Cursor**: hooks load automatically from `.cursor/hooks.json` next time + you open the repo. No prompt, no action needed. +- **Claude Code**: the first time you open this repo, Claude Code will + prompt you to **trust the project hooks**. You must approve — that's how + the enforcement attaches. After that it's automatic. +- **Codex CLI**: reads `AGENTS.md` automatically. No installation step. + Caveat — Codex CLI has no hook API today, so blocking out-of-scope + writes depends on the agent obeying the rules. +- **Gemini CLI**: reads `GEMINI.md` automatically. Same self-enforcement + caveat as Codex. +- **Other agents** (Continue, Cline, Roo, older Cursor): pick up + `.cursorrules`. Coverage varies — treat as best-effort. + ## Quick start ```bash @@ -242,9 +312,11 @@ edit them, the whole thing would be worthless. These paths are **always denied** regardless of active task, unless bootstrap mode is active: - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` +- `.claude/hooks/**`, `.claude/settings.json` - `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/schema/**` - `agent-scope/tasks/**`, `agent-scope/active`, `agent-scope/.bootstrap-token` +- `AGENTS.md`, `GEMINI.md`, `.cursorrules` (This list applies to **agent** writes only. A human editing any of these files through their own terminal/IDE is not restricted.) diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 7f4671e60..c4089fbc7 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -17,6 +17,7 @@ import { writeOnboardingMarker, copyToClipboard, } from '../lib/onboarding.mjs'; +import { detectAgents, statusGlyph, summary } from '../lib/check-agent.mjs'; try { checkNodeVersion(); } catch (e) { console.error(e.message); process.exit(3); } @@ -396,6 +397,34 @@ function audit(args) { console.log(`\n(${tail.length} of ${lines.length} entries)`); } +function checkAgent() { + console.log('agent-scope: checking per-agent setup'); + console.log(''); + const results = detectAgents(root); + for (const r of results) { + console.log(`${r.name} ${statusGlyph(r.status)}`); + console.log(` enforcement: ${r.enforcement}`); + for (const d of r.details) console.log(d); + if (r.setup.length) { + console.log(' setup:'); + for (const s of r.setup) console.log(s); + } + console.log(''); + } + const c = summary(results); + console.log( + `Summary: ${c.ok} hard-enforced, ${c.partial} soft-rule only, ` + + `${c.warn} need attention, ${c.missing} not configured.` + ); + if (c.warn > 0) { + console.log(''); + console.log('Action: at least one agent has issues — see [! check] entries above.'); + process.exit(1); + } + console.log(''); + console.log('Tip: run `pnpm task show` to see the active task scope (if any).'); +} + function resolveDebug() { console.log(`repo root: ${root}`); console.log(`env: AGENT_SCOPE_TASK=${process.env.AGENT_SCOPE_TASK || '(unset)'}`); @@ -426,6 +455,8 @@ try { case 'validate': validate(rest[0]); break; case 'audit': audit(rest); break; case 'resolve': resolveDebug(); break; + case 'check-agent': + case 'check-agents': checkAgent(); break; case '-h': case '--help': case 'help': console.log([ 'usage: task [args]', @@ -441,6 +472,7 @@ try { ' validate [] validate one or all manifests', ' audit [--since N] show recent denials from the audit log', ' resolve debug: show how the active task is resolved', + ' check-agent verify per-agent setup (Cursor/Claude Code/Codex/...)', ].join('\n')); break; default: diff --git a/agent-scope/lib/check-agent.mjs b/agent-scope/lib/check-agent.mjs new file mode 100644 index 000000000..2e6900854 --- /dev/null +++ b/agent-scope/lib/check-agent.mjs @@ -0,0 +1,274 @@ +// `pnpm task check-agent` — verify agent-scope is wired up correctly for +// each supported agent on this machine. Pure data; presentation is in +// agent-scope/bin/task.mjs. +// +// This is the post-`git pull` sanity command. Coworkers run it, see a +// per-agent green/yellow/red, and know what (if anything) they need to do. + +import { existsSync, readFileSync, statSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { spawnSync } from 'node:child_process'; + +// One descriptor per agent. Each .check() returns { status, details } where +// status is 'ok' | 'warn' | 'missing' | 'partial'. Soft-rule-only agents +// always return 'partial' to make it clear they have no hard enforcement. +// +// ok → fully wired up; hard enforcement on +// partial → instruction file present; agent must self-enforce +// warn → wired up but something is questionable (e.g. hook not +x) +// missing → not configured at all +// +// We never return 'fail' because a missing agent is the normal state for +// users who don't use that agent. The CLI only exits non-zero if the +// active task can't be loaded. + +export function detectAgents(root) { + return [ + cursorAgent(root), + claudeCodeAgent(root), + codexAgent(root), + geminiAgent(root), + legacyAgent(root), + ]; +} + +// --------------------------------------------------------------------------- +// Cursor +// --------------------------------------------------------------------------- + +function cursorAgent(root) { + const out = { + name: 'Cursor', + enforcement: 'hard hooks', + status: 'missing', + details: [], + setup: [], + }; + + const settings = resolve(root, '.cursor/hooks.json'); + const rule = resolve(root, '.cursor/rules/agent-scope.mdc'); + const hooksDir = resolve(root, '.cursor/hooks'); + + if (!existsSync(settings)) { + out.details.push(' ✗ .cursor/hooks.json not found'); + out.setup.push(' • Pull the latest commit — .cursor/hooks.json should be tracked.'); + return out; + } + + out.status = 'ok'; + out.details.push(' ✓ .cursor/hooks.json present'); + + const requiredHooks = [ + 'session-start.mjs', + 'scope-guard.mjs', + 'shell-precheck.mjs', + 'shell-diff-check.mjs', + 'post-tool-use.mjs', + ]; + for (const f of requiredHooks) { + const p = resolve(hooksDir, f); + if (!existsSync(p)) { + out.details.push(` ✗ .cursor/hooks/${f} missing`); + out.status = 'warn'; + out.setup.push(` • Pull the latest commit — .cursor/hooks/${f} should be tracked.`); + continue; + } + if (!isExecutable(p)) { + out.details.push(` ! .cursor/hooks/${f} not executable`); + out.status = 'warn'; + out.setup.push(` • Run: chmod +x .cursor/hooks/${f}`); + continue; + } + out.details.push(` ✓ .cursor/hooks/${f} executable`); + } + + if (existsSync(rule)) out.details.push(' ✓ .cursor/rules/agent-scope.mdc present'); + else { + out.details.push(' ! .cursor/rules/agent-scope.mdc missing — agent will lack onboarding protocol'); + out.status = out.status === 'ok' ? 'warn' : out.status; + out.setup.push(' • Pull the latest commit — .cursor/rules/agent-scope.mdc should be tracked.'); + } + + if (out.status === 'ok') { + out.setup.push(' Nothing to do. Cursor will load hooks automatically next time you open the repo.'); + } + return out; +} + +// --------------------------------------------------------------------------- +// Claude Code +// --------------------------------------------------------------------------- + +function claudeCodeAgent(root) { + const out = { + name: 'Claude Code', + enforcement: 'hard hooks', + status: 'missing', + details: [], + setup: [], + }; + + const settings = resolve(root, '.claude/settings.json'); + const claudeMd = resolve(root, 'CLAUDE.md'); + const hooksDir = resolve(root, '.claude/hooks'); + + if (!existsSync(settings)) { + out.details.push(' ✗ .claude/settings.json not found'); + out.setup.push(' • Pull the latest commit — .claude/settings.json should be tracked.'); + return out; + } + + out.status = 'ok'; + out.details.push(' ✓ .claude/settings.json present'); + + const requiredHooks = [ + 'session-start.mjs', + 'scope-guard.mjs', + 'shell-precheck.mjs', + 'shell-diff-check.mjs', + 'post-tool-use.mjs', + 'user-prompt-submit.mjs', + ]; + for (const f of requiredHooks) { + const p = resolve(hooksDir, f); + if (!existsSync(p)) { + out.details.push(` ✗ .claude/hooks/${f} missing`); + out.status = 'warn'; + out.setup.push(` • Pull the latest commit — .claude/hooks/${f} should be tracked.`); + continue; + } + if (!isExecutable(p)) { + out.details.push(` ! .claude/hooks/${f} not executable`); + out.status = 'warn'; + out.setup.push(` • Run: chmod +x .claude/hooks/${f}`); + continue; + } + out.details.push(` ✓ .claude/hooks/${f} executable`); + } + + if (existsSync(claudeMd)) out.details.push(' ✓ CLAUDE.md present'); + else { + out.details.push(' ! CLAUDE.md missing — agent will lack onboarding protocol'); + out.status = out.status === 'ok' ? 'warn' : out.status; + } + + if (out.status === 'ok') { + out.setup.push(' Nothing to do for hooks. Claude Code will load .claude/settings.json automatically.'); + out.setup.push(' First-run note: Claude Code will prompt you to TRUST the project hooks the first'); + out.setup.push(' time you open this repo. Approve them — that\'s how the enforcement attaches.'); + } + return out; +} + +// --------------------------------------------------------------------------- +// Codex CLI (OpenAI) +// --------------------------------------------------------------------------- + +function codexAgent(root) { + const out = { + name: 'Codex CLI', + enforcement: 'soft (no hook system available)', + status: 'missing', + details: [], + setup: [], + }; + + const agentsMd = resolve(root, 'AGENTS.md'); + if (!existsSync(agentsMd)) { + out.details.push(' ✗ AGENTS.md not found'); + out.setup.push(' • Pull the latest commit — AGENTS.md should be tracked.'); + return out; + } + + out.status = 'partial'; + out.details.push(' ✓ AGENTS.md present (Codex CLI reads this on every session)'); + out.details.push(' ! No hook system available in Codex CLI — agent self-enforces only.'); + out.details.push(' ! Hard blocks (preventing protected-file writes) DO NOT apply here.'); + out.setup.push(' Nothing to install. Codex CLI will read AGENTS.md automatically.'); + out.setup.push(' Caveat: rule compliance is by convention, not by enforcement.'); + return out; +} + +// --------------------------------------------------------------------------- +// Gemini CLI +// --------------------------------------------------------------------------- + +function geminiAgent(root) { + const out = { + name: 'Gemini CLI', + enforcement: 'soft (no hook system available)', + status: 'missing', + details: [], + setup: [], + }; + + const geminiMd = resolve(root, 'GEMINI.md'); + if (!existsSync(geminiMd)) { + out.details.push(' ✗ GEMINI.md not found'); + out.setup.push(' • Pull the latest commit — GEMINI.md should be tracked.'); + return out; + } + + out.status = 'partial'; + out.details.push(' ✓ GEMINI.md present'); + out.details.push(' ! No hook system available — Gemini self-enforces only.'); + out.setup.push(' Nothing to install. Gemini CLI will read GEMINI.md automatically.'); + return out; +} + +// --------------------------------------------------------------------------- +// Legacy / generic VS Code AI extensions (Continue, Cline, etc.) +// --------------------------------------------------------------------------- + +function legacyAgent(root) { + const out = { + name: 'Continue / Cline / older Cursor', + enforcement: 'soft (varies by extension)', + status: 'missing', + details: [], + setup: [], + }; + + const cursorrules = resolve(root, '.cursorrules'); + if (!existsSync(cursorrules)) { + out.details.push(' ✗ .cursorrules not found'); + out.setup.push(' • Pull the latest commit — .cursorrules should be tracked.'); + return out; + } + + out.status = 'partial'; + out.details.push(' ✓ .cursorrules present (legacy fallback rule file)'); + out.details.push(' ! Coverage varies by extension; treat as best-effort soft enforcement.'); + return out; +} + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function isExecutable(p) { + try { + const m = statSync(p).mode; + // owner / group / other execute bits + return Boolean(m & 0o111); + } catch { return false; } +} + +// --------------------------------------------------------------------------- +// Aggregate + +export function statusGlyph(s) { + switch (s) { + case 'ok': return '[✓ active]'; + case 'partial': return '[~ soft]'; + case 'warn': return '[! check]'; + case 'missing': return '[· not set up]'; + default: return '[?]'; + } +} + +export function summary(results) { + const counts = { ok: 0, partial: 0, warn: 0, missing: 0 }; + for (const r of results) counts[r.status] = (counts[r.status] || 0) + 1; + return counts; +} diff --git a/agent-scope/lib/check-agent.test.mjs b/agent-scope/lib/check-agent.test.mjs new file mode 100644 index 000000000..c43ad4159 --- /dev/null +++ b/agent-scope/lib/check-agent.test.mjs @@ -0,0 +1,123 @@ +// Unit tests for check-agent. +// node --test agent-scope/lib/check-agent.test.mjs + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, writeFileSync, chmodSync, rmSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { detectAgents, summary, statusGlyph } from './check-agent.mjs'; + +function makeRepo() { + const root = mkdtempSync(join(tmpdir(), 'agent-scope-checkagent-')); + mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); + return root; +} + +function touchHook(root, agentDir, name) { + const dir = join(root, agentDir, 'hooks'); + mkdirSync(dir, { recursive: true }); + const p = join(dir, name); + writeFileSync(p, '#!/usr/bin/env node\n'); + chmodSync(p, 0o755); +} + +test('detectAgents: empty repo → all missing', () => { + const root = makeRepo(); + try { + const r = detectAgents(root); + const byName = Object.fromEntries(r.map(x => [x.name, x.status])); + assert.equal(byName['Cursor'], 'missing'); + assert.equal(byName['Claude Code'], 'missing'); + assert.equal(byName['Codex CLI'], 'missing'); + assert.equal(byName['Gemini CLI'], 'missing'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('detectAgents: full Cursor wiring → ok', () => { + const root = makeRepo(); + try { + mkdirSync(join(root, '.cursor/rules'), { recursive: true }); + writeFileSync(join(root, '.cursor/hooks.json'), '{}'); + writeFileSync(join(root, '.cursor/rules/agent-scope.mdc'), ''); + for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs']) { + touchHook(root, '.cursor', f); + } + const cursor = detectAgents(root).find(a => a.name === 'Cursor'); + assert.equal(cursor.status, 'ok', JSON.stringify(cursor, null, 2)); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('detectAgents: Cursor hook not executable → warn', () => { + const root = makeRepo(); + try { + mkdirSync(join(root, '.cursor/rules'), { recursive: true }); + writeFileSync(join(root, '.cursor/hooks.json'), '{}'); + writeFileSync(join(root, '.cursor/rules/agent-scope.mdc'), ''); + for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs']) { + touchHook(root, '.cursor', f); + } + chmodSync(join(root, '.cursor/hooks/scope-guard.mjs'), 0o644); + const cursor = detectAgents(root).find(a => a.name === 'Cursor'); + assert.equal(cursor.status, 'warn'); + assert.ok(cursor.details.some(d => /not executable/.test(d))); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('detectAgents: full Claude Code wiring → ok', () => { + const root = makeRepo(); + try { + mkdirSync(join(root, '.claude'), { recursive: true }); + writeFileSync(join(root, '.claude/settings.json'), '{}'); + writeFileSync(join(root, 'CLAUDE.md'), ''); + for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', 'user-prompt-submit.mjs']) { + touchHook(root, '.claude', f); + } + const cc = detectAgents(root).find(a => a.name === 'Claude Code'); + assert.equal(cc.status, 'ok', JSON.stringify(cc, null, 2)); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('detectAgents: Codex agent with AGENTS.md → partial (soft only)', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'AGENTS.md'), ''); + const codex = detectAgents(root).find(a => a.name === 'Codex CLI'); + assert.equal(codex.status, 'partial'); + assert.match(codex.enforcement, /soft/); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('detectAgents: Gemini agent with GEMINI.md → partial', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'GEMINI.md'), ''); + const g = detectAgents(root).find(a => a.name === 'Gemini CLI'); + assert.equal(g.status, 'partial'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('detectAgents: legacy with .cursorrules → partial', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, '.cursorrules'), ''); + const l = detectAgents(root).find(a => a.name.startsWith('Continue')); + assert.equal(l.status, 'partial'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('summary: counts by status', () => { + const r = [ + { status: 'ok' }, + { status: 'partial' }, + { status: 'partial' }, + { status: 'missing' }, + ]; + assert.deepEqual(summary(r), { ok: 1, partial: 2, warn: 0, missing: 1 }); +}); + +test('statusGlyph: every status returns a string', () => { + for (const s of ['ok', 'partial', 'warn', 'missing', 'wat']) { + assert.equal(typeof statusGlyph(s), 'string'); + } +}); diff --git a/agent-scope/lib/scope.mjs b/agent-scope/lib/scope.mjs index cb9c94970..6eb704960 100644 --- a/agent-scope/lib/scope.mjs +++ b/agent-scope/lib/scope.mjs @@ -37,12 +37,17 @@ export const PROTECTED_PATTERNS = [ '.cursor/hooks/**', '.cursor/hooks.json', '.cursor/rules/agent-scope.mdc', + '.claude/hooks/**', + '.claude/settings.json', 'agent-scope/lib/**', 'agent-scope/bin/**', 'agent-scope/schema/**', 'agent-scope/tasks/**', 'agent-scope/active', 'agent-scope/.bootstrap-token', + 'AGENTS.md', + 'GEMINI.md', + '.cursorrules', ]; function bootstrapActive(root) { diff --git a/agent-scope/lib/scope.test.mjs b/agent-scope/lib/scope.test.mjs index 9e3660455..decfd42aa 100644 --- a/agent-scope/lib/scope.test.mjs +++ b/agent-scope/lib/scope.test.mjs @@ -79,10 +79,15 @@ test('checkProtected: matches a known protected path', () => { try { assert.equal(checkProtected('.cursor/hooks.json', isolated), 'deny'); assert.equal(checkProtected('.cursor/hooks/scope-guard.mjs', isolated), 'deny'); + assert.equal(checkProtected('.claude/hooks/scope-guard.mjs', isolated), 'deny'); + assert.equal(checkProtected('.claude/settings.json', isolated), 'deny'); assert.equal(checkProtected('agent-scope/lib/scope.mjs', isolated), 'deny'); assert.equal(checkProtected('agent-scope/tasks/base.json', isolated), 'deny'); assert.equal(checkProtected('agent-scope/active', isolated), 'deny'); assert.equal(checkProtected('agent-scope/.bootstrap-token', isolated), 'deny'); + assert.equal(checkProtected('AGENTS.md', isolated), 'deny'); + assert.equal(checkProtected('GEMINI.md', isolated), 'deny'); + assert.equal(checkProtected('.cursorrules', isolated), 'deny'); } finally { rmSync(isolated, { recursive: true, force: true }); } }); @@ -153,19 +158,25 @@ test('coversProtected: bootstrap bypasses', () => { }); test('PROTECTED_PATTERNS: covers all system surfaces', () => { - // Sanity: make sure nothing is forgotten. The guard only protects its own - // live surfaces (Cursor hooks + rule + scope library + bin CLI + task - // manifests + active-task pointer + the bootstrap token itself). + // Sanity: make sure nothing is forgotten. The guard protects its own live + // surfaces across every supported agent (Cursor hooks + rule, Claude Code + // hooks + settings, the agent-scope library + bin CLI + task manifests + + // active-task pointer + bootstrap token, and the cross-agent rule files). const required = [ '.cursor/hooks/**', '.cursor/hooks.json', '.cursor/rules/agent-scope.mdc', + '.claude/hooks/**', + '.claude/settings.json', 'agent-scope/lib/**', 'agent-scope/bin/**', 'agent-scope/schema/**', 'agent-scope/tasks/**', 'agent-scope/active', 'agent-scope/.bootstrap-token', + 'AGENTS.md', + 'GEMINI.md', + '.cursorrules', ]; for (const p of required) assert.ok(PROTECTED_PATTERNS.includes(p), `missing protection: ${p}`); }); diff --git a/package.json b/package.json index 4787201d8..592fb6ddc 100644 --- a/package.json +++ b/package.json @@ -29,9 +29,10 @@ "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", "task": "node agent-scope/bin/task.mjs", - "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs", + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs", "scope:validate": "node agent-scope/bin/task.mjs validate", - "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show" + "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show", + "scope:check-agent": "node agent-scope/bin/task.mjs check-agent" }, "devDependencies": { "@types/node": "^22", From abf6d5b1eff2c36db433013ff982f441e246464b Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 11:06:50 +0200 Subject: [PATCH 06/21] agent-scope: make pnpm task start an interactive CLI wizard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The default `pnpm task start` now walks the user through a short questionnaire (description, packages, extras) and writes + activates the manifest directly — no agent round-trip, works in every agent (and with no agent at all). The legacy agent-guided flow is preserved behind `pnpm task start --chat` and is also used automatically when stdin is not a TTY so CI and pipes don't hang. - lib/wizard.mjs pure logic: discoverPackages (pnpm-workspace.yaml, package.json workspaces, or packages/* fallback), deriveTaskId, suggestPackagesFromDescription (keyword overlap scoring), draftGlobs, buildManifest - lib/prompter.mjs tiny readline-based prompter (ask / askYesNo / askChoice / askMultiNumber / askLines) with injectable streams for tests - bin/task.mjs rewired start(): interactive by default, --chat forces legacy marker+clipboard flow, no-TTY auto-falls-back; preview/edit/cancel step before saving; overwrite confirmation on id collision - wizard.test.mjs 25 unit tests covering every pure helper - docs README/CLAUDE.md/Cursor rule/AGENTS.md/GEMINI.md/ .cursorrules updated: wizard is the default path, agent-guided onboarding only fires under --chat Made-with: Cursor --- .cursor/rules/agent-scope.mdc | 19 +- .cursorrules | 5 +- AGENTS.md | 19 +- CLAUDE.md | 31 ++-- GEMINI.md | 5 +- agent-scope/README.md | 99 ++++++---- agent-scope/bin/task.mjs | 311 ++++++++++++++++++++++++++++---- agent-scope/lib/prompter.mjs | 102 +++++++++++ agent-scope/lib/wizard.mjs | 297 ++++++++++++++++++++++++++++++ agent-scope/lib/wizard.test.mjs | 262 +++++++++++++++++++++++++++ package.json | 2 +- 11 files changed, 1058 insertions(+), 94 deletions(-) create mode 100644 agent-scope/lib/prompter.mjs create mode 100644 agent-scope/lib/wizard.mjs create mode 100644 agent-scope/lib/wizard.test.mjs diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 67501d185..b86203e07 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -1,5 +1,5 @@ --- -description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` or an active task. All denials surface a plan-mode AskQuestion menu. +description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` (interactive wizard, preferred) or `pnpm task start --chat` (agent-guided) or an active task. All denials surface a plan-mode AskQuestion menu. alwaysApply: true --- @@ -10,9 +10,16 @@ engages it. If no active task is set and bootstrap is off, the system is a no-op for you — behave normally. The hardcoded protected paths (the guard's own files) are still defended, but that only matters if you try to touch them. -The user turns the system on in one of two ways: +The user turns the system on in one of three ways: -1. **CLI** — they run `pnpm task start`. That drops a one-shot marker file +1. **Interactive wizard (most common, no agent involvement)** — they run + `pnpm task start` in their terminal. A CLI wizard asks them a few + questions and writes + activates a manifest directly. No marker is + dropped and you see NOTHING in chat. By the time they message you, the + session-start hook has already injected the active-task context block — + treat it like case 3 below. +2. **Agent-guided (`pnpm task start --chat`)** — they explicitly want YOU + to do the onboarding. This drops a one-shot marker file (`agent-scope/.pending-onboarding`, gitignored) and copies the trigger text to their clipboard. THREE parallel consumers compete for the marker — whichever runs first wins, because consumption is atomic read-and-delete: @@ -26,7 +33,7 @@ The user turns the system on in one of two ways: Result: the next user message in any chat, new or existing, triggers onboarding exactly once. After that the marker is gone and everything else behaves normally. -2. **Existing active task** — the session-start hook injects a context block +3. **Existing active task** — the session-start hook injects a context block naming the active task. From that moment on, writes are task-scoped and every denial must be surfaced via the plan-mode denial protocol below. @@ -62,7 +69,9 @@ you in that case; do not double-delete — just follow the protocol. Triggered by any of: -- The trigger line from `pnpm task start` (paste from the user's terminal). +- The trigger line from `pnpm task start --chat` (paste from the user's + terminal) or the pending-onboarding marker being consumed by a hook / by + your top-of-turn check above. - The user typing "start a scoped task", "scope me", "agent-scope: start", or similar intent. diff --git a/.cursorrules b/.cursorrules index 0b4ce57d7..20f4b750a 100644 --- a/.cursorrules +++ b/.cursorrules @@ -20,7 +20,10 @@ Quick summary: - `.claude/hooks/**`, `.claude/settings.json` - `agent-scope/**` (lib, bin, schema, tasks, active, .bootstrap-token) - `AGENTS.md`, `GEMINI.md`, `.cursorrules` -- If the user runs `pnpm task start`, a one-shot marker at +- The user's default onboarding is `pnpm task start` — an interactive CLI + wizard that writes the manifest itself. You see nothing; an active task + may just be set when you start talking. +- If the user runs `pnpm task start --chat`, a one-shot marker at `agent-scope/.pending-onboarding` is dropped. On your first action of any turn with no active task, check the marker — if present, delete it and pivot to the onboarding protocol in `AGENTS.md`. diff --git a/AGENTS.md b/AGENTS.md index 5d47f833a..74d201ba4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,8 +19,11 @@ through `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`. The guard is **invisible by default**. It only activates when: -1. The user pastes a line beginning with `agent-scope: start task onboarding.` - (output of `pnpm task start`), OR +1. The user runs `pnpm task start --chat` and the trigger line + `agent-scope: start task onboarding.` reaches you (via a hook or via + your own top-of-turn marker check). The default `pnpm task start` + without `--chat` is an interactive CLI wizard that writes a manifest + itself and never reaches you, so most users will skip this flow, OR 2. An active task is set (`agent-scope/active` exists; the session-start hook will inject a context block naming it; or you can check by running `pnpm task show`), OR @@ -50,10 +53,13 @@ terminal: touch agent-scope/.bootstrap-token ``` -## Task onboarding (when the user runs `pnpm task start`) +## Task onboarding (when the user runs `pnpm task start --chat`) -`pnpm task start` drops a one-shot marker file at -`agent-scope/.pending-onboarding` containing trigger text. The marker is +`pnpm task start --chat` drops a one-shot marker file at +`agent-scope/.pending-onboarding` containing trigger text. (The default +`pnpm task start` without `--chat` is an interactive CLI wizard that never +involves you — by the time the user messages you, the manifest is already +written and activated.) The marker is consumed atomically the first time anything reads it. For Codex CLI and other agents without hook support, you should **proactively @@ -132,7 +138,8 @@ an option that wasn't listed. ## CLI quick reference ``` -pnpm task start # begin guided onboarding +pnpm task start # interactive wizard (default, preferred) +pnpm task start --chat # legacy: hand off onboarding to the agent pnpm task list # list available task manifests pnpm task show # show the active task and its scope pnpm task set # set the active task diff --git a/CLAUDE.md b/CLAUDE.md index 701c047ff..75efb212e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -190,20 +190,26 @@ This repo ships an opt-in task-scoping guard. It stays **invisible** unless engaged. Default behaviour: write normally, read anything. The guard only kicks in when: -1. The user pastes a line starting with `agent-scope: start task onboarding` - (output of `pnpm task start`), OR -2. An active task is set (session-start hook injects a context block naming +1. The user runs `pnpm task start` (interactive wizard — most common; by + the time they message you, an active task is already set and the + session-start hook has injected the context block), OR +2. The user runs `pnpm task start --chat` and the trigger line + `agent-scope: start task onboarding` appears in your context (marker + consumed by a hook or by your top-of-turn check), OR +3. An active task is set (session-start hook injects a context block naming it), OR -3. You try to touch one of the hardcoded protected paths that defend the +4. You try to touch one of the hardcoded protected paths that defend the guard itself — those are always blocked unless the user has enabled bootstrap mode. -### Task onboarding protocol (triggered by `pnpm task start` / "start a scoped task") +### Task onboarding protocol (triggered by `pnpm task start --chat` / "start a scoped task") -`pnpm task start` drops `agent-scope/.pending-onboarding` (gitignored) and -copies the trigger to the user's clipboard. The marker is one-shot: the -first thing that consumes it also deletes it, so it fires for **exactly one** -user message no matter which chat that message lands in. +`pnpm task start --chat` drops `agent-scope/.pending-onboarding` (gitignored) +and copies the trigger to the user's clipboard. (The default +`pnpm task start` without `--chat` is the interactive CLI wizard — it +writes the manifest itself and never reaches you.) The marker is one-shot: +the first thing that consumes it also deletes it, so it fires for **exactly +one** user message no matter which chat that message lands in. Three parallel consumers — whichever runs first wins: @@ -229,8 +235,8 @@ active task** (`agent-scope/active` is absent), your FIRST action MUST be: was a specific instruction — they knew onboarding was pending when they sent it). c. Pivot to the **Task onboarding protocol** below: acknowledge that - onboarding was queued via `pnpm task start`, and immediately ask the - user to describe the task in detail. + onboarding was queued via `pnpm task start --chat`, and immediately + ask the user to describe the task in detail. Skip this check only when `agent-scope/active` exists (a task is already set — no onboarding needed) or when this is a multi-turn conversation @@ -333,7 +339,8 @@ AskQuestion prompt (see below). ### CLI quick reference ``` -pnpm task start # begin guided onboarding (prints chat trigger) +pnpm task start # interactive wizard (default) — user runs this; writes + activates manifest directly +pnpm task start --chat # legacy: hand off onboarding to you via chat pnpm task create [flags] # non-interactive manifest build — USER runs this pnpm task list | show | set | clear | check | audit | resolve pnpm scope:status | scope:validate | scope:test diff --git a/GEMINI.md b/GEMINI.md index 9376c389e..2cd01b379 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -12,7 +12,10 @@ Key points for Gemini: `pnpm task check ` to test a specific path. - A set of system files is **always protected** regardless of task. See the "Hardcoded protected paths" section in `AGENTS.md`. -- If the user runs `pnpm task start`, a one-shot marker file at +- Most users run `pnpm task start` without flags — that's an interactive + CLI wizard that writes a manifest directly. You don't see anything + special; by the time the user messages you, the active task is set. +- If the user runs `pnpm task start --chat`, a one-shot marker file at `agent-scope/.pending-onboarding` is dropped. On your first action of any new turn (when no task is active), check whether that marker exists; if it does, delete it and run the task-onboarding protocol from diff --git a/agent-scope/README.md b/agent-scope/README.md index fb4f01414..5277d25e2 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -22,15 +22,21 @@ session-start hook emits nothing, and the write/shell hooks only fire on the hardcoded protected paths (the guard's own files). You can do ad-hoc work without any task ceremony. -You engage the system in one of three ways: - -1. **Guided onboarding** — run `pnpm task start`. The CLI prints a line you - paste into your Cursor chat. The agent then asks you to describe the task, - explores the codebase, proposes a scope via AskQuestion, and prints the - exact `pnpm task create` command for you to run. -2. **Explicit** — `pnpm task set ` activates a manifest you +You engage the system in one of four ways: + +1. **Interactive wizard (default)** — run `pnpm task start`. A terminal + wizard asks you a few questions (description, which packages, extras), + drafts a manifest, shows a preview, and activates it. No agent round-trip + needed; works identically in every agent (Cursor, Claude Code, Codex, + Gemini, …) and even with no agent at all. +2. **Agent-guided (`pnpm task start --chat`)** — legacy flow. Drops a + one-shot marker and copies the trigger text to your clipboard. The agent + explores the repo, proposes a scope via `AskQuestion`, and prints the + exact `pnpm task create` command for you to run. Use this when you want + the agent to do the thinking. +3. **Explicit** — `pnpm task set ` activates a manifest you already have. -3. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` +4. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` builds + activates a manifest in one shot. Clearing the active task (`pnpm task clear`) returns Cursor to its invisible @@ -151,10 +157,13 @@ pnpm scope:validate # validates every manifest ## Quick start ```bash -# Guided onboarding — prints a chat trigger for the Cursor agent +# Interactive wizard (default) — asks a few questions, drafts + activates a manifest pnpm task start -# Non-interactive manifest creation (run this yourself; see "Onboarding flow") +# Agent-guided flow (legacy) — hands off onboarding to the agent via chat +pnpm task start --chat + +# Non-interactive manifest creation (flags) pnpm task create my-task \ --description "Refactor peer sync for workspace auth" \ --allowed "packages/agent/src/**sync*" \ @@ -190,28 +199,54 @@ pnpm task clear ## Onboarding flow -The `pnpm task start` command is the paved path. It does three things: - -1. Drops a one-shot marker file at `agent-scope/.pending-onboarding` - (gitignored). -2. Copies the onboarding trigger to your clipboard (best-effort, via - `pbcopy` / `wl-copy` / `xclip` / `clip` depending on OS). -3. Prints a short message explaining the three equivalent paths to trigger - the agent. - -Any of these will start the onboarding — pick whichever is easiest: - -- **New chat (Cmd+L / "new chat" button)** — the `sessionStart` hook - detects the marker, injects the trigger as initial context, deletes the - marker. The agent immediately asks you to describe the task. -- **Current chat, any message** — the next tool the agent calls triggers - the `postToolUse` hook, which injects the trigger as - `additional_context`. The agent sees it on the very next turn and - pivots to onboarding. -- **Manual paste (Cmd+V / Ctrl+V)** — the trigger is already in your - clipboard. Paste into any chat and send. - -Whichever path fires, the agent then follows a fixed protocol (defined in +There are two onboarding flows. The **interactive wizard** is the default; +the **agent-guided flow** (`--chat`) is an alternative when you want the +agent to do the thinking. + +### Flow 1 — interactive wizard (default) + +Run `pnpm task start`. The CLI walks you through a short questionnaire: + +1. **Description** — one sentence describing the task. Used as + `description` in the manifest and as the seed for the task id. +2. **Task id** — auto-kebab-cased from the description; press Enter to + accept or type your own. +3. **Packages** — the wizard discovers workspace packages from + `pnpm-workspace.yaml` (or `package.json` `workspaces`, or a `packages/*` + fallback), presents them as a numbered list, and pre-selects the ones + whose names overlap with keywords in your description. Type the numbers + you want, or press Enter to accept the suggestion, or `none` to skip. +4. **Build-artefact exemptions** — y/n for the standard + `**/dist/**`, `**/*.tsbuildinfo`, `pnpm-lock.yaml` set. +5. **Extra allowed globs** (optional) and **extra deny globs** (optional) — + free-text, one per line, blank to finish. `!**/secrets.*` and + `!**/.env*` are always denied automatically. +6. **Preview** — prints the drafted manifest JSON. +7. **Save / edit / cancel** — `s` saves & activates, `e` opens `$EDITOR` + (or `$VISUAL`) on the file and re-validates on exit, `c` aborts without + writing anything. + +No chat round-trip, no agent needed, runs in under a second, works +identically in every agent. This is the recommended path. + +If `stdin` is not a TTY (CI, piped input), `pnpm task start` auto-falls-back +to the agent-guided flow so nothing hangs. You can also force the legacy +flow with `pnpm task start --chat`. + +### Flow 2 — agent-guided (`pnpm task start --chat`) + +Drops a one-shot marker at `agent-scope/.pending-onboarding` (gitignored) +and copies the trigger to your clipboard. Your NEXT message in any chat +(new or existing) makes the agent pivot to onboarding. Three parallel +consumers compete for the marker so it fires exactly once: + +- **New chat (Cmd+L)** — the `sessionStart` hook injects the trigger. +- **Current chat, any message** — the agent's top-of-turn rule reads the + marker on its first action; the `postToolUse` hook injects it as + `additional_context` if the agent happens to call a tool first. +- **Manual paste** — the trigger is already in your clipboard. + +The agent then follows a fixed protocol (defined in `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`): 1. Asks you to describe what you're building or fixing. diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index c4089fbc7..951be3da0 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -18,6 +18,14 @@ import { copyToClipboard, } from '../lib/onboarding.mjs'; import { detectAgents, statusGlyph, summary } from '../lib/check-agent.mjs'; +import { + discoverPackages, + deriveTaskId, + suggestPackagesFromDescription, + buildManifest, +} from '../lib/wizard.mjs'; +import { createPrompter } from '../lib/prompter.mjs'; +import { spawnSync } from 'node:child_process'; try { checkNodeVersion(); } catch (e) { console.error(e.message); process.exit(3); } @@ -189,14 +197,26 @@ async function init(id) { } // --------------------------------------------------------------------------- -// Task onboarding — `task start` prints a trigger the user pastes to chat; -// `task create` is the non-interactive manifest builder the onboarding flow -// ultimately runs. Both are designed so *the human* creates the manifest — -// an agent-invoked shell command that writes to agent-scope/tasks/ would be -// wiped by the afterShellExecution backstop. +// Task onboarding +// --------------------------------------------------------------------------- +// +// There are two independent ways to start a task: +// +// (1) `pnpm task start` — default. Interactive CLI wizard that asks a few +// questions, drafts a manifest, previews it, and saves + activates it. +// No agent round-trip; works identically in every agent (Cursor, +// Claude Code, Codex, Gemini, …) and even with no agent at all. +// +// (2) `pnpm task start --chat` — legacy flow. Drops a one-shot marker and +// copies the trigger text to the clipboard so the agent picks up +// onboarding on the user's next message. Use this when you want the +// agent to explore the repo and propose a scope for you. +// +// Non-interactive shell (no TTY / piped stdin) auto-falls-back to --chat so +// CI / non-interactive harnesses don't hang on a prompt. // --------------------------------------------------------------------------- -function start() { +async function start(argv = []) { const { id: activeId } = resolveActiveTaskId(root); if (activeId) { console.log(`A task is already active: ${activeId}`); @@ -206,51 +226,269 @@ function start() { return; } - // Drop the one-shot marker. Three parallel consumers (sessionStart hook / - // postToolUse hook / agent top-of-turn rule check) all compete for it; - // whoever reads it also deletes it, so onboarding triggers for exactly - // ONE user message after this call. - const markerPath = writeOnboardingMarker(root); + const chatMode = argv.includes('--chat') || argv.includes('-c'); + const forceInteractive = argv.includes('--interactive') || argv.includes('-i'); + const ttyOk = Boolean(process.stdin.isTTY); + + if (chatMode || (!forceInteractive && !ttyOk)) { + return startChat({ reason: chatMode ? 'flag' : 'no-tty' }); + } + await startInteractive(); +} - // Best-effort clipboard copy so the user can paste into the current chat - // without selecting the trigger text by hand. +function startChat({ reason } = {}) { + const markerPath = writeOnboardingMarker(root); const clip = copyToClipboard(ONBOARDING_TRIGGER_TEXT); - console.log('agent-scope: task onboarding primed.'); - console.log(''); - console.log('The NEXT message you send in any Cursor chat (new or existing)'); - console.log('will pivot the agent into onboarding. Then the marker is'); - console.log('deleted, so it only triggers once.'); + console.log('agent-scope: task onboarding primed (chat mode).'); + if (reason === 'no-tty') { + console.log('(stdin is not a TTY — falling back to chat mode so nothing hangs.)'); + } console.log(''); - console.log('Paths that work (pick whichever is easiest):'); + console.log('Your NEXT message in any chat (new or existing) will pivot the'); + console.log('agent into onboarding. The marker is then consumed, so it only'); + console.log('fires once.'); console.log(''); - console.log(' (1) Open a NEW chat (Cmd+L / "new chat" button) and say'); - console.log(' anything — the sessionStart hook will inject the trigger.'); - console.log(' (2) In your CURRENT chat, send any message — the agent\'s'); - console.log(' always-on rule checks for the marker at the top of every'); - console.log(' turn, so even "hi" will kick off onboarding.'); if (clip.ok) { - console.log(` (3) Paste (Cmd+V) — the trigger is already in your clipboard`); - console.log(` (via ${clip.method}).`); + console.log(`The trigger is already in your clipboard (via ${clip.method}).`); + console.log('Just send any message in your current chat — or paste (Cmd+V)'); + console.log('and send, for maximum reliability.'); } else { - console.log(` (3) Paste the trigger below into chat manually`); - console.log(` (clipboard copy unavailable: ${clip.reason}):`); + console.log(`Clipboard copy unavailable (${clip.reason}). Paste this manually:`); console.log(''); for (const line of ONBOARDING_TRIGGER_TEXT.split('\n')) { - console.log(' ' + line); + console.log(' ' + line); } } console.log(''); console.log(`Marker file: ${markerPath}`); - console.log('(Auto-deleted the first time any consumer reads it.)'); console.log(''); - console.log('Already know the scope? Skip the dance and run directly:'); - console.log(' pnpm task create --description "..." \\'); - console.log(' --allowed "packages/foo/**" --allowed "packages/bar/baz.ts" \\'); - console.log(' --inherits base --activate'); + console.log('Prefer to skip the chat round-trip? Run `pnpm task start` without'); + console.log('--chat for the interactive wizard (default), or use'); + console.log('`pnpm task create --description "..." --allowed "" ...` directly.'); bootstrapWarning(); } +async function startInteractive() { + console.log('agent-scope: interactive task wizard'); + console.log(' (no agent needed — hit Ctrl+C any time to cancel, nothing is saved until the final "save" step.)'); + console.log(' (tip: for agent-guided onboarding instead, run `pnpm task start --chat`)'); + console.log(''); + + const prompter = createPrompter(); + try { + // 1) Description --------------------------------------------------------- + const description = await askNonEmpty( + prompter, + 'What are you working on? (one short sentence)\n> ', + 'A description is required so the task manifest is self-explanatory.', + ); + console.log(''); + + // 2) Task ID ------------------------------------------------------------- + const existingIds = listTasks(root); + const suggestedId = deriveTaskId(description, { existingIds }); + const idInput = await prompter.ask( + `Task id (press Enter to accept "${suggestedId}"): `, + { default: suggestedId }, + ); + let taskId = idInput; + if (!/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(taskId)) { + console.log(` (invalid id "${taskId}" — falling back to "${suggestedId}")`); + taskId = suggestedId; + } + if (existingIds.includes(taskId)) { + const deduped = deriveTaskId(taskId + '-alt', { existingIds }); + console.log(` (id "${taskId}" already exists — using "${deduped}")`); + taskId = deduped; + } + const manifestPath = resolve(tasksDir, `${taskId}.json`); + console.log(''); + + // 3) Packages ------------------------------------------------------------ + const packages = discoverPackages(root); + let selectedPackages = []; + if (packages.length === 0) { + console.log('No workspace packages detected — skipping package picker.'); + console.log('(You can add allowed globs freely in the next step.)'); + } else { + const suggested = suggestPackagesFromDescription(description, packages); + const suggestedSet = new Set(suggested.map(p => p.path)); + const suggestedIndices = []; + printPackageList(packages, suggestedSet); + packages.forEach((p, i) => { + if (suggestedSet.has(p.path)) suggestedIndices.push(i + 1); + }); + const prompt = suggestedIndices.length + ? `Pick packages (space/comma separated; Enter = suggested [${suggestedIndices.join(' ')}]; type "none" for none): ` + : `Pick packages (space/comma separated; "none" or blank for none): `; + const picked = await prompter.askMultiNumber(prompt, packages.length, { + default: suggestedIndices, + }); + selectedPackages = picked.map(i => packages[i - 1]).filter(Boolean); + if (selectedPackages.length) { + console.log(` Selected: ${selectedPackages.map(p => p.name).join(', ')}`); + } else { + console.log(' No packages selected. You can still add custom allowed globs below.'); + } + } + console.log(''); + + // 4) Build artefacts ----------------------------------------------------- + const includeBuildArtifacts = await prompter.askYesNo( + 'Include build artefacts + lockfile as exemptions (**/dist/**, *.tsbuildinfo, pnpm-lock.yaml)?', + { default: true }, + ); + console.log(''); + + // 5) Extras -------------------------------------------------------------- + const extraAllowed = await prompter.askLines( + 'Additional ALLOWED globs (optional):', + { hint: 'one per line, blank to finish (e.g. scripts/dev.ts)' }, + ); + const extraDeny = await prompter.askLines( + 'Additional DENY globs (optional):', + { hint: 'one per line, blank to finish (! is added automatically). secrets and .env* are denied by default.' }, + ); + console.log(''); + + // 6) Build & preview ----------------------------------------------------- + const inheritBase = listTasks(root).includes('base'); + const manifest = buildManifest({ + id: taskId, + description, + selectedPackages, + includeBuildArtifacts, + extraAllowed, + extraDeny, + inheritBase, + existingIds, + }); + + const errs = validateManifest(manifest, taskId); + if (errs.length) { + console.error('Generated manifest failed validation:'); + for (const e of errs) console.error(` - ${e}`); + bail('could not build a valid manifest from your inputs — aborting without saving'); + } + + if (!manifest.allowed && !manifest.inherits) { + console.log('Heads up: no allowed globs and no inherits — agent will have nothing it can write.'); + const proceed = await prompter.askYesNo('Continue anyway?', { default: false }); + if (!proceed) { console.log('Aborted. Nothing was saved.'); return; } + } + + console.log('Proposed manifest:'); + console.log(` ${manifestPath}`); + console.log(''); + for (const line of JSON.stringify(manifest, null, 2).split('\n')) { + console.log(` ${line}`); + } + console.log(''); + + // 7) Save / edit / cancel ----------------------------------------------- + const decision = await prompter.askChoice('What next?', [ + { key: 's', label: 'save and activate (recommended)' }, + { key: 'e', label: 'edit manually (opens $EDITOR; saved & activated on close)' }, + { key: 'c', label: 'cancel — nothing will be written' }, + ], { default: 's' }); + + if (decision === 'c') { console.log('Aborted. Nothing was saved.'); return; } + + if (existsSync(manifestPath)) { + const overwrite = await prompter.askYesNo( + `Manifest already exists at ${manifestPath}. Overwrite?`, + { default: false }, + ); + if (!overwrite) { console.log('Aborted. Existing manifest untouched.'); return; } + } + + writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n', 'utf8'); + console.log(`Created ${manifestPath}`); + + if (decision === 'e') { + const opened = openInEditor(manifestPath); + if (!opened.ok) { + console.log(`(editor launch failed: ${opened.reason} — manifest is saved as-is, edit it later with your editor of choice)`); + } else { + // Re-validate after editing; if invalid, leave it there and warn. + let edited; + try { edited = JSON.parse(readFileSync(manifestPath, 'utf8')); } + catch (e) { + console.error(`Saved file is no longer valid JSON: ${e.message}`); + console.error('Leaving it in place. Fix it by hand and run `pnpm task validate ' + taskId + '`.'); + return; + } + const editErrs = validateManifest(edited, taskId); + if (editErrs.length) { + console.error('Edited manifest has validation errors:'); + for (const e of editErrs) console.error(` - ${e}`); + console.error('Leaving it in place. Fix it and run `pnpm task validate ' + taskId + '`.'); + return; + } + } + } + + writeFileSync(activeFile, `${taskId}\n`, 'utf8'); + console.log(`Activated: ${taskId}`); + console.log(''); + console.log('The agent can now only write files matching the allowed globs.'); + console.log('Useful next commands:'); + console.log(' pnpm task show — see current scope'); + console.log(' pnpm task check — test a single path'); + console.log(' pnpm task clear — exit task mode'); + bootstrapWarning(); + } finally { + prompter.close(); + } +} + +async function askNonEmpty(prompter, prompt, explain) { + for (let attempt = 0; attempt < 3; attempt++) { + const v = await prompter.ask(prompt); + if (v && v.trim().length >= 3) return v.trim(); + console.log(` ${explain}`); + } + bail('no description provided after 3 tries — aborting'); +} + +function printPackageList(packages, suggestedSet) { + console.log('Workspace packages:'); + const width = Math.max(...packages.map(p => p.name.length), 4); + const cols = 2; + const rows = Math.ceil(packages.length / cols); + for (let r = 0; r < rows; r++) { + const line = []; + for (let c = 0; c < cols; c++) { + const i = c * rows + r; + if (i >= packages.length) continue; + const p = packages[i]; + const n = (i + 1).toString().padStart(2, ' '); + const marker = suggestedSet && suggestedSet.has(p.path) ? '*' : ' '; + line.push(` ${marker}${n}. ${p.name.padEnd(width, ' ')}`); + } + console.log(line.join(' ')); + } + if (suggestedSet && suggestedSet.size) console.log(' (* = suggested from your description)'); +} + +function openInEditor(filePath) { + const editor = process.env.VISUAL || process.env.EDITOR || 'vi'; + try { + const parts = editor.split(/\s+/).filter(Boolean); + const cmd = parts[0]; + const args = parts.slice(1).concat(filePath); + const r = spawnSync(cmd, args, { stdio: 'inherit' }); + if (r.error) return { ok: false, reason: r.error.message }; + if (typeof r.status === 'number' && r.status !== 0) { + return { ok: false, reason: `editor exited with status ${r.status}` }; + } + return { ok: true }; + } catch (e) { + return { ok: false, reason: e.message }; + } +} + function parseCreateArgs(argv) { const out = { id: null, @@ -450,7 +688,7 @@ try { case 'clear': clear(); break; case 'check': check(rest[0]); break; case 'init': await init(rest[0]); break; - case 'start': start(); break; + case 'start': await start(rest); break; case 'create': create(rest); break; case 'validate': validate(rest[0]); break; case 'audit': audit(rest); break; @@ -461,7 +699,8 @@ try { console.log([ 'usage: task [args]', '', - ' start begin guided onboarding (prints a chat trigger)', + ' start interactive wizard: draft a manifest + activate', + ' start --chat legacy flow: hand off onboarding to the agent', ' list list available task manifests', ' show show the active task and its scope', ' set set the active task', diff --git a/agent-scope/lib/prompter.mjs b/agent-scope/lib/prompter.mjs new file mode 100644 index 000000000..321a4b7e9 --- /dev/null +++ b/agent-scope/lib/prompter.mjs @@ -0,0 +1,102 @@ +// Tiny interactive-prompter built on readline. Zero external deps so it +// works from a freshly-cloned repo. The CLI uses it for `pnpm task start`; +// it's also exported in case anyone wants to drop another wizard on top. +// +// Design rules: +// - Every prompt has a default that's used on blank input. +// - Nothing here mutates global state (process.stdin etc.) — the input/ +// output streams are injectable so tests can feed canned stdin. +// - `close()` is safe to call multiple times. + +import { createInterface } from 'node:readline'; + +export function createPrompter({ + input = process.stdin, + output = process.stdout, +} = {}) { + const rl = createInterface({ input, output, terminal: false }); + const buffered = []; + const waiters = []; + let closed = false; + + rl.on('line', line => { + if (waiters.length) waiters.shift()(line); + else buffered.push(line); + }); + rl.on('close', () => { + closed = true; + while (waiters.length) waiters.shift()(''); + }); + + const readLine = () => new Promise(r => { + if (buffered.length) return r(buffered.shift()); + if (closed) return r(''); + waiters.push(r); + }); + + const write = (s) => { try { output.write(s); } catch { /* ignore */ } }; + + async function ask(prompt, { default: dflt = '' } = {}) { + write(prompt); + const line = await readLine(); + const v = (line ?? '').trim(); + return v.length ? v : dflt; + } + + async function askYesNo(prompt, { default: dflt = true } = {}) { + const tag = dflt ? '[Y/n]' : '[y/N]'; + const ans = (await ask(`${prompt} ${tag} `)).toLowerCase(); + if (!ans) return dflt; + if (/^y(es)?$/.test(ans)) return true; + if (/^n(o)?$/.test(ans)) return false; + return dflt; + } + + async function askChoice(prompt, options, { default: dflt } = {}) { + // options: [{ key, label }] + const byKey = new Map(options.map(o => [o.key.toLowerCase(), o])); + const display = options + .map(o => (o.key === dflt ? o.key.toUpperCase() : o.key)) + .join('/'); + for (const o of options) write(` [${o.key}] ${o.label}\n`); + const ans = (await ask(`Choice [${display}]: `)).toLowerCase(); + if (!ans && dflt) return dflt; + if (byKey.has(ans)) return byKey.get(ans).key; + return dflt || options[0].key; + } + + // Reads a list of integers (1-based) entered space- or comma-separated. + // Returns a de-duped sorted array of indices within [1, count]. + async function askMultiNumber(prompt, count, { default: dflt = [] } = {}) { + const defaultStr = dflt.length ? dflt.join(' ') : ''; + const raw = await ask(prompt, { default: defaultStr }); + if (!raw) return []; + if (/^none$/i.test(raw) || /^-$/.test(raw)) return []; + const nums = raw + .split(/[\s,]+/) + .filter(Boolean) + .map(s => parseInt(s, 10)) + .filter(n => Number.isInteger(n) && n >= 1 && n <= count); + return [...new Set(nums)].sort((a, b) => a - b); + } + + // Read free-text lines until a blank line. Useful for "extra globs". + async function askLines(headline, { hint } = {}) { + if (headline) write(headline + '\n'); + if (hint) write(` (${hint})\n`); + const lines = []; + for (;;) { + write(' > '); + const line = await readLine(); + if (line === null || line === undefined) break; + const v = line.trim(); + if (!v) break; + lines.push(v); + } + return lines; + } + + function close() { try { rl.close(); } catch { /* ignore */ } } + + return { ask, askYesNo, askChoice, askMultiNumber, askLines, close }; +} diff --git a/agent-scope/lib/wizard.mjs b/agent-scope/lib/wizard.mjs new file mode 100644 index 000000000..5bfff4f5a --- /dev/null +++ b/agent-scope/lib/wizard.mjs @@ -0,0 +1,297 @@ +// Pure helpers for the interactive `pnpm task start` wizard. +// +// Everything in this module is deterministic and side-effect-free once given +// a repo root, so every branch is unit-testable without a TTY. The actual +// interactive prompting lives in prompter.mjs; the CLI orchestration lives in +// bin/task.mjs. This file is the part you'd want to reuse if someone wanted +// to build (say) a VS Code command-palette version. + +import { + existsSync, readFileSync, readdirSync, statSync, +} from 'node:fs'; +import { resolve, join, relative, sep } from 'node:path'; + +// --------------------------------------------------------------------------- +// Package discovery +// --------------------------------------------------------------------------- +// +// Order of precedence: +// 1. pnpm-workspace.yaml (`packages:` list of globs/paths) +// 2. package.json `workspaces` (array or object.packages array) +// 3. fallback: scan `packages/*` +// +// We do a permissive line-based YAML parse so we don't pull in a dependency. +// The file format we care about is narrow and stable: +// +// packages: +// - "packages/*" +// - "demo" +// +// Anything fancier (nested keys, flow style) will just fall through to the +// workspaces / packages fallbacks. + +function parseWorkspaceYaml(text) { + const lines = text.split(/\r?\n/); + let inPkgs = false; + const out = []; + for (const raw of lines) { + const line = raw.replace(/#.*$/, ''); + if (/^packages\s*:\s*$/.test(line)) { inPkgs = true; continue; } + if (inPkgs && /^\S/.test(line)) break; + if (!inPkgs) continue; + const m = /^\s*-\s*["']?([^"'\s]+?)["']?\s*$/.exec(line); + if (m) out.push(m[1]); + } + return out; +} + +function parsePackageJsonWorkspaces(text) { + try { + const obj = JSON.parse(text); + const ws = obj && obj.workspaces; + if (Array.isArray(ws)) return ws.filter(s => typeof s === 'string'); + if (ws && Array.isArray(ws.packages)) return ws.packages.filter(s => typeof s === 'string'); + return []; + } catch { return []; } +} + +function expandGlobEntry(root, entry) { + // Only supports trailing `/*` (flat) and literal paths — enough for every + // real monorepo layout I've seen. If you need deeper expansion you should + // not be using the wizard anyway, just hand-author the manifest. + if (entry.endsWith('/*')) { + const base = entry.slice(0, -2); + const abs = resolve(root, base); + if (!existsSync(abs)) return []; + let names; + try { names = readdirSync(abs); } catch { return []; } + return names + .filter(n => !n.startsWith('.')) + .map(n => join(base, n)) + .filter(p => { + const full = resolve(root, p); + try { return statSync(full).isDirectory(); } catch { return false; } + }); + } + return [entry]; +} + +function readPackageName(root, pkgDir) { + const pj = resolve(root, pkgDir, 'package.json'); + if (!existsSync(pj)) return null; + try { + const obj = JSON.parse(readFileSync(pj, 'utf8')); + if (obj && typeof obj.name === 'string' && obj.name.trim()) return obj.name.trim(); + } catch { /* fall through */ } + return null; +} + +function shortName(pkgDir, fullName) { + if (fullName && fullName.includes('/')) { + const tail = fullName.split('/').pop(); + if (tail) return tail; + } + const parts = pkgDir.split('/'); + return parts[parts.length - 1] || pkgDir; +} + +export function discoverPackages(root) { + const entries = []; + + const wsYaml = resolve(root, 'pnpm-workspace.yaml'); + if (existsSync(wsYaml)) { + try { entries.push(...parseWorkspaceYaml(readFileSync(wsYaml, 'utf8'))); } + catch { /* ignore */ } + } + if (!entries.length) { + const pj = resolve(root, 'package.json'); + if (existsSync(pj)) { + try { entries.push(...parsePackageJsonWorkspaces(readFileSync(pj, 'utf8'))); } + catch { /* ignore */ } + } + } + if (!entries.length) entries.push('packages/*'); + + const dirs = new Set(); + for (const e of entries) { + for (const p of expandGlobEntry(root, e)) { + const pj = resolve(root, p, 'package.json'); + if (existsSync(pj)) dirs.add(p.split(sep).join('/')); + } + } + + const pkgs = []; + for (const pkgDir of [...dirs].sort()) { + const pjName = readPackageName(root, pkgDir); + const displayName = shortName(pkgDir, pjName); + pkgs.push({ + path: pkgDir, + name: displayName, + fullName: pjName || null, + }); + } + return pkgs; +} + +// --------------------------------------------------------------------------- +// Task-id derivation +// --------------------------------------------------------------------------- + +const ID_MAX = 48; + +export function deriveTaskId(description, { existingIds = [] } = {}) { + const base = (description || '') + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, ID_MAX) + .replace(/^-+|-+$/g, ''); + + const fallback = () => { + const stamp = new Date().toISOString().replace(/[^0-9]/g, '').slice(0, 14); + return `task-${stamp}`; + }; + + let id = base; + if (!id || !/^[a-z0-9]/.test(id)) id = fallback(); + + if (!existingIds.includes(id)) return id; + for (let i = 2; i < 1000; i++) { + const suffix = `-${i}`; + const trimmed = id.slice(0, ID_MAX - suffix.length) + suffix; + if (!existingIds.includes(trimmed)) return trimmed; + } + return fallback(); +} + +// --------------------------------------------------------------------------- +// Keyword-based suggestion +// --------------------------------------------------------------------------- + +const STOP_WORDS = new Set([ + 'a','an','the','and','or','but','to','of','in','on','at','for','from','with', + 'by','as','is','are','was','were','be','been','being','do','does','did','can', + 'could','should','would','will','shall','may','might','must','this','that', + 'these','those','it','its','into','onto','over','under','about','through', + 'some','any','all','no','not','we','you','i','me','my','our','their','there', + 'here','up','down','out','if','then','than','so','very','just','also','too', + 'work','task','feature','feat','fix','bug','refactor','improve','add','remove', + 'rework','update','change','changes','new','old', +]); + +function tokenize(text) { + return (text || '') + .toLowerCase() + .split(/[^a-z0-9]+/) + .filter(t => t.length >= 2 && !STOP_WORDS.has(t)); +} + +function scorePackage(descTokens, descLower, pkg) { + const nameLower = pkg.name.toLowerCase(); + const pathLower = pkg.path.toLowerCase(); + const fullLower = (pkg.fullName || '').toLowerCase(); + + let score = 0; + if (descLower.includes(nameLower) && nameLower.length >= 3) score += 4; + if (descLower.includes(pathLower)) score += 3; + if (fullLower && descLower.includes(fullLower)) score += 3; + + const nameTokens = new Set([ + ...nameLower.split(/[-_/]+/).filter(Boolean), + ...fullLower.split(/[-_/@]+/).filter(Boolean), + ]); + + for (const t of descTokens) { + if (nameTokens.has(t)) score += 2; + else if (t.length >= 4 && (nameLower.includes(t) || pathLower.includes(t))) score += 1; + } + return score; +} + +export function suggestPackagesFromDescription(description, packages, { max } = {}) { + if (!Array.isArray(packages) || packages.length === 0) return []; + const descLower = (description || '').toLowerCase(); + const descTokens = tokenize(description); + if (descTokens.length === 0) return []; + + const scored = packages + .map(p => ({ pkg: p, score: scorePackage(descTokens, descLower, p) })) + .filter(s => s.score > 0) + .sort((a, b) => b.score - a.score); + + const cap = Math.max(1, Math.min(max ?? Math.ceil(packages.length / 2), scored.length)); + return scored.slice(0, cap).map(s => s.pkg); +} + +// --------------------------------------------------------------------------- +// Glob drafting + manifest composition +// --------------------------------------------------------------------------- + +const DEFAULT_DENY = ['!**/secrets.*', '!**/.env*']; +const DEFAULT_BUILD_EXEMPTIONS = ['**/dist/**', '**/*.tsbuildinfo', 'pnpm-lock.yaml']; + +export function draftGlobs(selectedPackages, opts = {}) { + const { + includeBuildArtifacts = true, + extraAllowed = [], + extraDeny = [], + } = opts; + + const allowed = []; + const seenAllowed = new Set(); + const push = (p) => { + if (typeof p !== 'string') return; + const trimmed = p.trim(); + if (!trimmed || seenAllowed.has(trimmed)) return; + seenAllowed.add(trimmed); + allowed.push(trimmed); + }; + + for (const pkg of selectedPackages || []) { + const path = typeof pkg === 'string' ? pkg : pkg.path; + if (!path) continue; + push(`${path.replace(/\/+$/, '')}/**`); + } + for (const p of extraAllowed) push(p); + for (const p of extraDeny) { + const withBang = p.startsWith('!') ? p : `!${p}`; + push(withBang); + } + for (const d of DEFAULT_DENY) push(d); + + const exemptions = includeBuildArtifacts ? [...DEFAULT_BUILD_EXEMPTIONS] : []; + + return { allowed, exemptions }; +} + +export function buildManifest({ + id, + description, + selectedPackages, + extraAllowed = [], + extraDeny = [], + includeBuildArtifacts = true, + inheritBase = true, + existingIds = [], + now = () => new Date().toISOString(), +}) { + const finalId = id && /^[a-z0-9][a-z0-9-_.]{0,63}$/.test(id) + ? id + : deriveTaskId(description, { existingIds }); + + const { allowed, exemptions } = draftGlobs(selectedPackages, { + includeBuildArtifacts, + extraAllowed, + extraDeny, + }); + + const manifest = { + id: finalId, + description: description ? description.trim() : undefined, + created: now(), + inherits: inheritBase ? ['base'] : undefined, + allowed: allowed.length ? allowed : undefined, + exemptions: exemptions.length ? exemptions : undefined, + }; + return Object.fromEntries(Object.entries(manifest).filter(([, v]) => v !== undefined)); +} diff --git a/agent-scope/lib/wizard.test.mjs b/agent-scope/lib/wizard.test.mjs new file mode 100644 index 000000000..945a1578b --- /dev/null +++ b/agent-scope/lib/wizard.test.mjs @@ -0,0 +1,262 @@ +// Unit tests for the wizard pure-logic. +// node --test agent-scope/lib/wizard.test.mjs + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { + mkdtempSync, mkdirSync, writeFileSync, rmSync, +} from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + discoverPackages, + deriveTaskId, + suggestPackagesFromDescription, + draftGlobs, + buildManifest, +} from './wizard.mjs'; + +function makeRepo() { + const root = mkdtempSync(join(tmpdir(), 'agent-scope-wizard-')); + mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); + return root; +} + +function writePkg(root, relPath, name) { + const full = join(root, relPath); + mkdirSync(full, { recursive: true }); + writeFileSync(join(full, 'package.json'), JSON.stringify({ name }, null, 2)); +} + +// --- discoverPackages ----------------------------------------------------- + +test('discoverPackages: pnpm-workspace.yaml with packages/*', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'pnpm-workspace.yaml'), + 'packages:\n - "packages/*"\n - "demo"\n'); + writePkg(root, 'packages/agent', '@x/agent'); + writePkg(root, 'packages/core', '@x/core'); + writePkg(root, 'demo', '@x/demo'); + // A directory without package.json should be skipped. + mkdirSync(join(root, 'packages/no-pkg')); + + const pkgs = discoverPackages(root); + const names = pkgs.map(p => p.name).sort(); + assert.deepEqual(names, ['agent', 'core', 'demo']); + assert.ok(pkgs.every(p => typeof p.path === 'string' && p.path.length > 0)); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('discoverPackages: falls back to package.json workspaces', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'package.json'), + JSON.stringify({ name: 'root', workspaces: ['libs/*'] })); + writePkg(root, 'libs/alpha', 'alpha'); + writePkg(root, 'libs/beta', 'beta'); + + const pkgs = discoverPackages(root); + assert.deepEqual(pkgs.map(p => p.name).sort(), ['alpha', 'beta']); + assert.ok(pkgs.every(p => p.path.startsWith('libs/'))); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('discoverPackages: falls back to packages/* scan when nothing declared', () => { + const root = makeRepo(); + try { + writePkg(root, 'packages/lone', 'lone'); + const pkgs = discoverPackages(root); + assert.deepEqual(pkgs.map(p => p.name), ['lone']); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('discoverPackages: empty repo → empty', () => { + const root = makeRepo(); + try { + assert.deepEqual(discoverPackages(root), []); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('discoverPackages: ignores dotfile subdirs when expanding globs', () => { + const root = makeRepo(); + try { + writeFileSync(join(root, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n'); + writePkg(root, 'packages/real', 'real'); + mkdirSync(join(root, 'packages/.hidden'), { recursive: true }); + const pkgs = discoverPackages(root); + assert.deepEqual(pkgs.map(p => p.name), ['real']); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +// --- deriveTaskId --------------------------------------------------------- + +test('deriveTaskId: kebab-cases a description', () => { + assert.equal(deriveTaskId('Refactor Peer Sync'), 'refactor-peer-sync'); +}); + +test('deriveTaskId: strips leading/trailing dashes', () => { + assert.equal(deriveTaskId(' -- hello world -- '), 'hello-world'); +}); + +test('deriveTaskId: truncates to 48 chars', () => { + const long = 'a'.repeat(100); + const id = deriveTaskId(long); + assert.ok(id.length <= 48, `got ${id.length}`); + assert.match(id, /^a+$/); +}); + +test('deriveTaskId: empty → task-', () => { + const id = deriveTaskId(''); + assert.match(id, /^task-\d{8,14}$/); +}); + +test('deriveTaskId: colon-only → task-', () => { + const id = deriveTaskId('!!!'); + assert.match(id, /^task-\d{8,14}$/); +}); + +test('deriveTaskId: collision → appends -2, -3', () => { + const existing = ['fix-auth', 'fix-auth-2']; + const id = deriveTaskId('fix auth', { existingIds: existing }); + assert.equal(id, 'fix-auth-3'); +}); + +// --- suggestPackagesFromDescription --------------------------------------- + +const SAMPLE_PKGS = [ + { path: 'packages/agent', name: 'agent' }, + { path: 'packages/core', name: 'core' }, + { path: 'packages/publisher', name: 'publisher' }, + { path: 'packages/storage', name: 'storage' }, + { path: 'packages/evm-module',name: 'evm-module' }, + { path: 'packages/cli', name: 'cli' }, +]; + +test('suggestPackages: exact name match wins', () => { + const s = suggestPackagesFromDescription('refactor peer sync in agent and core', SAMPLE_PKGS); + const names = s.map(p => p.name); + assert.ok(names.includes('agent'), names.join(',')); + assert.ok(names.includes('core'), names.join(',')); +}); + +test('suggestPackages: token inside compound name (evm)', () => { + const s = suggestPackagesFromDescription('improve evm deployment', SAMPLE_PKGS); + assert.ok(s.some(p => p.name === 'evm-module'), s.map(x => x.name).join(',')); +}); + +test('suggestPackages: empty description → empty', () => { + assert.deepEqual(suggestPackagesFromDescription('', SAMPLE_PKGS), []); +}); + +test('suggestPackages: no match → empty', () => { + const s = suggestPackagesFromDescription('write unrelated documentation for readme', SAMPLE_PKGS); + assert.equal(s.length, 0); +}); + +test('suggestPackages: ignores 1-char / stopword tokens', () => { + // 'a' 'to' 'the' would otherwise match 'agent', 'storage', 'publisher' + const s = suggestPackagesFromDescription('a to the', SAMPLE_PKGS); + assert.equal(s.length, 0); +}); + +test('suggestPackages: caps at ceil(n/2) by default', () => { + const s = suggestPackagesFromDescription( + 'agent core publisher storage evm cli', + SAMPLE_PKGS, + ); + assert.ok(s.length <= Math.ceil(SAMPLE_PKGS.length / 2), + `suggestions: ${s.map(p => p.name).join(',')}`); +}); + +// --- draftGlobs ----------------------------------------------------------- + +test('draftGlobs: one package → one allowed entry plus deny negations', () => { + const { allowed, exemptions } = draftGlobs( + [{ path: 'packages/agent', name: 'agent' }], + { includeBuildArtifacts: false }, + ); + assert.deepEqual(allowed, [ + 'packages/agent/**', + '!**/secrets.*', + '!**/.env*', + ]); + assert.deepEqual(exemptions, []); +}); + +test('draftGlobs: multiple packages + build exemptions', () => { + const { allowed, exemptions } = draftGlobs( + [{ path: 'packages/agent' }, { path: 'packages/core' }], + { includeBuildArtifacts: true }, + ); + assert.ok(allowed.includes('packages/agent/**')); + assert.ok(allowed.includes('packages/core/**')); + assert.ok(allowed.includes('!**/secrets.*')); + assert.deepEqual(exemptions, ['**/dist/**', '**/*.tsbuildinfo', 'pnpm-lock.yaml']); +}); + +test('draftGlobs: extraAllowed + extraDeny', () => { + const { allowed } = draftGlobs([], { + includeBuildArtifacts: false, + extraAllowed: ['scripts/my-tool.ts'], + extraDeny: ['config/**', '!already/!prefixed.ts'], + }); + assert.ok(allowed.includes('scripts/my-tool.ts')); + assert.ok(allowed.includes('!config/**')); + assert.ok(allowed.includes('!already/!prefixed.ts')); +}); + +test('draftGlobs: deduplicates identical entries', () => { + const { allowed } = draftGlobs( + [{ path: 'packages/agent' }, { path: 'packages/agent/' }], + { includeBuildArtifacts: false }, + ); + assert.equal(allowed.filter(a => a === 'packages/agent/**').length, 1); +}); + +test('draftGlobs: accepts raw path strings as well as {path} objects', () => { + const { allowed } = draftGlobs( + ['packages/mixed', { path: 'packages/object' }], + { includeBuildArtifacts: false }, + ); + assert.ok(allowed.includes('packages/mixed/**')); + assert.ok(allowed.includes('packages/object/**')); +}); + +// --- buildManifest -------------------------------------------------------- + +test('buildManifest: composes a valid-looking manifest', () => { + const m = buildManifest({ + id: 'my-task', + description: 'Refactor sync', + selectedPackages: [{ path: 'packages/agent' }], + includeBuildArtifacts: true, + inheritBase: true, + now: () => '2026-01-01T00:00:00.000Z', + }); + assert.equal(m.id, 'my-task'); + assert.equal(m.description, 'Refactor sync'); + assert.equal(m.created, '2026-01-01T00:00:00.000Z'); + assert.deepEqual(m.inherits, ['base']); + assert.ok(m.allowed.includes('packages/agent/**')); + assert.ok(m.exemptions.includes('**/dist/**')); +}); + +test('buildManifest: invalid id → derives from description', () => { + const m = buildManifest({ + id: '---bad---', + description: 'Fix staking flow', + selectedPackages: [{ path: 'packages/chain' }], + }); + assert.match(m.id, /^fix-staking-flow/); +}); + +test('buildManifest: no inheritBase → no inherits field', () => { + const m = buildManifest({ + id: 'isolated', + description: 'd', + selectedPackages: [{ path: 'packages/x' }], + inheritBase: false, + }); + assert.equal(m.inherits, undefined); +}); diff --git a/package.json b/package.json index 592fb6ddc..904783c43 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", "task": "node agent-scope/bin/task.mjs", - "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs", + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs agent-scope/lib/wizard.test.mjs", "scope:validate": "node agent-scope/bin/task.mjs validate", "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show", "scope:check-agent": "node agent-scope/bin/task.mjs check-agent" From d30b79841d54b4180975b1690ab4ddcc6b74a49e Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 11:46:00 +0200 Subject: [PATCH 07/21] =?UTF-8?q?agent-scope:=20replace=20--chat=20with=20?= =?UTF-8?q?--smart=20=E2=80=94=20AI-driven=20onboarding=20with=20descripti?= =?UTF-8?q?on=20capture?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --smart captures a multi-line task description in the CLI, embeds it in the one-shot marker, and hands off to the agent. The agent reads the description verbatim (no re-asking), explores the repo, and proposes a scope via a rich two-part AskQuestion (multi-select packages + single-select action). - onboarding.mjs: add buildOnboardingTrigger({description}) and extractDescription(); keep ONBOARDING_TRIGGER_TEXT as a backcompat alias. New trigger text embeds the description in a fenced block and describes the two-question AskQuestion protocol the agent must use. - onboarding.test.mjs: +8 tests for buildOnboardingTrigger / extractDescription round-trip, multi-line preservation, whitespace trimming, empty-description fallback, and malformed-marker tolerance. - bin/task.mjs: rewire start() — add --smart mode with multi-line description capture (blank-line-terminated, safe against closed stdin), keep --chat as a deprecated alias with a warning, remove the old TTY auto-fallback and error out cleanly with 'use pnpm task create' guidance instead. Default pnpm task start still runs the interactive wizard. - Rules + docs (.cursor/rules, CLAUDE.md, AGENTS.md, GEMINI.md, .cursorrules, README.md): full rewrite of the onboarding protocol — agents now must check for the description block and use the two-part AskQuestion layout with a package picker and action picker. Tests: 183 green in ~0.8s. Schemas + base manifests still validate. Made-with: Cursor --- .cursor/rules/agent-scope.mdc | 141 +++++++++++++++--------- .cursorrules | 10 +- AGENTS.md | 68 +++++++----- CLAUDE.md | 85 +++++++++----- GEMINI.md | 8 +- agent-scope/README.md | 78 +++++++------ agent-scope/bin/task.mjs | 137 +++++++++++++++++------ agent-scope/lib/onboarding.mjs | 165 +++++++++++++++++++++------- agent-scope/lib/onboarding.test.mjs | 60 +++++++++- 9 files changed, 525 insertions(+), 227 deletions(-) diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index b86203e07..4f44fc01a 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -1,5 +1,5 @@ --- -description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` (interactive wizard, preferred) or `pnpm task start --chat` (agent-guided) or an active task. All denials surface a plan-mode AskQuestion menu. +description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` (interactive wizard, preferred) or `pnpm task start --smart` (agent-guided with pre-captured description) or an active task. All denials surface a plan-mode AskQuestion menu. alwaysApply: true --- @@ -18,11 +18,12 @@ The user turns the system on in one of three ways: dropped and you see NOTHING in chat. By the time they message you, the session-start hook has already injected the active-task context block — treat it like case 3 below. -2. **Agent-guided (`pnpm task start --chat`)** — they explicitly want YOU - to do the onboarding. This drops a one-shot marker file - (`agent-scope/.pending-onboarding`, gitignored) and copies the trigger - text to their clipboard. THREE parallel consumers compete for the marker - — whichever runs first wins, because consumption is atomic read-and-delete: +2. **Smart onboarding (`pnpm task start --smart`)** — they want YOU to read + their task description, explore the repo, and propose a scope. The CLI + captures a multi-line description from them and drops a one-shot marker + (`agent-scope/.pending-onboarding`, gitignored) that already embeds the + description. THREE parallel consumers compete for the marker — whichever + runs first wins, because consumption is atomic read-and-delete: - `sessionStart` hook — injects the trigger as initial context in any NEW chat. - `postToolUse` hook — injects it as `additional_context` after ANY @@ -65,58 +66,88 @@ postToolUse hooks), treat it the same as finding the marker: pivot to the onboarding protocol immediately. The hooks already delete the marker for you in that case; do not double-delete — just follow the protocol. -## Task onboarding protocol — when the user wants to start a new scoped task +## Smart onboarding protocol — when the user wants to start a new scoped task Triggered by any of: -- The trigger line from `pnpm task start --chat` (paste from the user's - terminal) or the pending-onboarding marker being consumed by a hook / by - your top-of-turn check above. -- The user typing "start a scoped task", "scope me", "agent-scope: start", or - similar intent. +- The smart-mode trigger text (either found in the pending-onboarding + marker, or injected by the `sessionStart` / `postToolUse` hook). It + begins with `agent-scope: start task onboarding.` and usually contains a + `=== USER TASK DESCRIPTION ===` block. +- The user typing "start a scoped task", "scope me", "agent-scope: start", + or similar intent without a description (in which case you'll need to + ask them to describe it first — see step 1). Follow these steps exactly: -1. **Ask for the task description.** Send a plain chat message: - > "OK, let's scope a new task. Describe in detail what we're building or - > fixing — the more specific, the better the scope I can propose. Which - > packages, which behaviors, which tests, any files you already know about?" - -2. **Wait for the user's reply.** Do not explore yet. - -3. **Explore the codebase.** Use the DKG SPARQL queries in the top of this - file, plus `Glob`, `Grep`, and `Read`, to find: +1. **Get the task description.** + - **If the trigger already contains a `=== USER TASK DESCRIPTION ===` block** + (the `--smart` flow): use that description verbatim as your brief. + DO NOT re-ask the user to describe the task — they already typed it + into the CLI. + - **Otherwise** (bare marker or an intent phrase), send a plain chat + message: + > "OK, let's scope a new task. Describe in detail what we're building + > or fixing — packages, behaviours, tests, any files you already know + > about. The more detail, the tighter the scope I can propose." + + Wait for the reply. + +2. **Explore the codebase.** Use `Glob`, `Grep`, `SemanticSearch`, `Read`, + and the DKG SPARQL queries at the top of this file to locate: - The package(s) the task touches - - The specific source files implementing the mentioned behavior + - Specific source files implementing the mentioned behaviour - Sibling test files that cover them - Related schemas / contracts / migrations if the task mentions them + - Count matching files per package so you can show weights in Q1. -4. **Draft a scope.** Aim for a set of globs that: +3. **Draft a scope.** Aim for a set of globs that: - Covers everything you reasonably expect to touch - Errs slightly broad (under-scoping causes constant denials mid-work; - over-scoping just gives you rope the user can see via `pnpm task show`) + over-scoping just gives the user rope they can see via `pnpm task show`) + - Prefers whole-package globs (`packages//**`) over file globs + when in doubt - Includes matching test files - Always appends `!**/secrets.*`, `!**/.env*` as safety denies - Inherits `base` unless there is a reason not to (base supplies the - standard exemptions: `**/dist/**`, lockfiles, build artifacts) - -5. **Propose the scope via `AskQuestion`.** The prompt **must** include: - - A one-sentence rephrasing of the user's task description - - The list of allowed globs you're proposing, bullet-pointed - - Your recommendation ("I recommend `approve`") - - The suggested task id (kebab-case, short) - - Options: - - `approve` — "Create the manifest with these globs and activate it." - - `show_globs` — "Let me see the full manifest JSON first." - - `edit` — "I want to tweak the globs before approving." - - `cancel` — "Don't start a task; I'll work unscoped." - - `custom_instruction` — "Let me type my own instruction." - -6. **On `approve`:** print a code block with the **exact** command for the user - to run in their own terminal. The user must run it (not you) because the - `afterShellExecution` hook would delete an untracked file in - `agent-scope/tasks/` if you ran it yourself: + standard exemptions: `**/dist/**`, lockfiles, build artefacts) + +4. **Propose the scope via a SINGLE `AskQuestion` call with TWO questions.** + + **Q1 — package picker (multi-select).** + - `id`: `packages` + - `allow_multiple`: `true` + - `prompt`: `"Which packages should be writable for this task?"` — + directly above the list, include a one-sentence rephrasing of the + user's description and the proposed task id. + - `options`: one entry per candidate package, labelled + `" files match"` with a sample of 2-3 relevant file + paths in the label where it helps. Pre-check the packages you've + already decided to include by listing them first and describing them + as the recommended selection in the prompt. + - Include a final option `other` labelled + `"None of the above / I'll specify files manually"` so the user can + escape the picker. + + **Q2 — action (single-select).** + - `id`: `action` + - `allow_multiple`: `false` + - `prompt`: `"Action?"` + - `options` (IDs must match exactly): + - `approve` — `"Create + activate this scope"` + - `show_json` — `"Show the full manifest JSON first"` + - `edit_globs` — `"Let me hand-edit the allowed/deny globs"` + - `widen` — `"Let me add another package/file"` + - `narrow` — `"Let me remove a package/file"` + - `cancel` — `"Abort, no task"` + - `custom_instruction` — `"Let me type my own instruction"` + - Your recommendation should appear inline in the prompt + ("I recommend `approve`"). + +5. **On `approve` + the Q1 package selection:** print a fenced bash block + with the **exact** command for the user to run in their own terminal. + The user must run it (not you) because the `afterShellExecution` hook + would delete an untracked file in `agent-scope/tasks/` if you did. ```bash pnpm task create \ @@ -128,22 +159,24 @@ Follow these steps exactly: --activate ``` - Then say "Let me know when you've run that and I'll start." Wait for the - user to confirm with any short acknowledgement ("done", "go", "ok"). + Then say "Let me know when you've run that and I'll start." Wait for + the user to confirm with any short acknowledgement ("done", "go", "ok"). -7. **On `show_globs`:** print the JSON you're about to propose, then re-ask - `AskQuestion` with the same options. +6. **On `show_json`:** print the drafted manifest JSON, then re-ask the + same two questions. -8. **On `edit`:** ask the user which globs to change, then loop back to step 5. +7. **On `edit_globs` / `widen` / `narrow`:** ask one targeted follow-up in + chat ("Which globs would you like to change / add / remove?"), apply + the user's answer to the draft, then re-ask the same two questions. -9. **On `cancel`:** acknowledge and continue working without a task (system - stays invisible). +8. **On `cancel`:** acknowledge and continue working without a task (the + system stays invisible). -10. **On `custom_instruction`:** ask the user in plain chat what they want - instead, then do that. +9. **On `custom_instruction`:** ask the user in plain chat what they want + instead, then do that. -From step 6 onward, the active task is set and the plan-mode denial protocol -(below) governs every future write. +From step 5 onward, the active task is set and the plan-mode denial +protocol (below) governs every future write. ## Plan-mode denial protocol — MANDATORY once a task is active diff --git a/.cursorrules b/.cursorrules index 20f4b750a..7b1ae5324 100644 --- a/.cursorrules +++ b/.cursorrules @@ -23,10 +23,12 @@ Quick summary: - The user's default onboarding is `pnpm task start` — an interactive CLI wizard that writes the manifest itself. You see nothing; an active task may just be set when you start talking. -- If the user runs `pnpm task start --chat`, a one-shot marker at - `agent-scope/.pending-onboarding` is dropped. On your first action of any - turn with no active task, check the marker — if present, delete it and - pivot to the onboarding protocol in `AGENTS.md`. +- If the user runs `pnpm task start --smart`, a one-shot marker at + `agent-scope/.pending-onboarding` is dropped. The marker already + embeds the user's task description in a `=== USER TASK DESCRIPTION ===` + block — do NOT ask them to describe it again. On your first action of + any turn with no active task, check the marker — if present, delete it + and pivot to the smart-onboarding protocol in `AGENTS.md`. - Never invent menu options when surfacing a denial; reuse the JSON `options` array verbatim and add your reasoning + recommendation. - Hook-supporting agents (Cursor, Claude Code) physically block out-of-scope diff --git a/AGENTS.md b/AGENTS.md index 74d201ba4..be126130c 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,9 +19,11 @@ through `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`. The guard is **invisible by default**. It only activates when: -1. The user runs `pnpm task start --chat` and the trigger line +1. The user runs `pnpm task start --smart` and the trigger line `agent-scope: start task onboarding.` reaches you (via a hook or via - your own top-of-turn marker check). The default `pnpm task start` + your own top-of-turn marker check). The marker already embeds the + user's task description in a `=== USER TASK DESCRIPTION ===` block — + do NOT ask the user to describe it again. The default `pnpm task start` without `--chat` is an interactive CLI wizard that writes a manifest itself and never reaches you, so most users will skip this flow, OR 2. An active task is set (`agent-scope/active` exists; the session-start @@ -53,14 +55,16 @@ terminal: touch agent-scope/.bootstrap-token ``` -## Task onboarding (when the user runs `pnpm task start --chat`) +## Smart onboarding (when the user runs `pnpm task start --smart`) -`pnpm task start --chat` drops a one-shot marker file at -`agent-scope/.pending-onboarding` containing trigger text. (The default -`pnpm task start` without `--chat` is an interactive CLI wizard that never -involves you — by the time the user messages you, the manifest is already -written and activated.) The marker is -consumed atomically the first time anything reads it. +`pnpm task start --smart` captures a multi-line task description from the +user in the terminal, then drops a one-shot marker file at +`agent-scope/.pending-onboarding` containing trigger text *and* the +user's description embedded in a `=== USER TASK DESCRIPTION ===` block. +(The default `pnpm task start` without `--smart` is an interactive CLI +wizard that never involves you — by the time the user messages you, the +manifest is already written and activated.) The marker is consumed +atomically the first time anything reads it. For Codex CLI and other agents without hook support, you should **proactively check for this marker on the first action of every turn** when no task is @@ -74,23 +78,33 @@ active: ### Onboarding protocol -1. Acknowledge in plain chat. Ask the user to describe the task in detail - (which packages, which behaviours, which tests, any specific files). -2. Wait for the description. -3. Explore the codebase to find the files the task will touch. Use - whatever exploration tools you have (file listing, grep, the DKG MCP - server if available). -4. Draft a conservative set of allowed globs. Inherit from `base`. Always - append `!**/secrets.*` and `!**/.env*`. -5. Propose the scope to the user (a one-line task summary, the proposed - globs, your recommendation). Ask whether to: - - approve - - show full globs - - edit the globs - - cancel - - give a custom instruction -6. On approve: print the **exact** command for the user to run in their - terminal (do NOT run it yourself — see the warning below): +1. **Get the task description.** + - If the marker contains a `=== USER TASK DESCRIPTION ===` block + (the `--smart` flow), use that verbatim as the brief. DO NOT ask + the user to describe the task again. + - Otherwise, ask them in chat: "Describe the task in detail — + packages, behaviours, tests, any files you already know about." + Wait for reply. +2. Explore the codebase to find the files the task will touch. Use + whatever exploration tools you have (file listing, grep, semantic + search, the DKG MCP server if available). Count matching files per + candidate package. +3. Draft a conservative set of allowed globs. Prefer whole-package + globs (`packages//**`). Inherit from `base`. Always append + `!**/secrets.*` and `!**/.env*`. +4. Propose the scope to the user as a **two-part question** (use + whatever multi-select + single-select UI your client supports): + - **Q1 (multi-select, "Which packages should be writable?"):** one + option per candidate package labelled + ` files match`, with 2-3 sample paths inline where + helpful. Pre-select the packages you already decided to include. + Include a `None of the above / I'll specify manually` escape + option. + - **Q2 (single-select, "Action?"):** `approve`, `show_json`, + `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. +5. On `approve` + the Q1 package selection: print the **exact** command + for the user to run in their terminal (do NOT run it yourself — + see the warning below): ```bash pnpm task create \ @@ -139,7 +153,7 @@ an option that wasn't listed. ``` pnpm task start # interactive wizard (default, preferred) -pnpm task start --chat # legacy: hand off onboarding to the agent +pnpm task start --smart # user pastes description; agent proposes scope pnpm task list # list available task manifests pnpm task show # show the active task and its scope pnpm task set # set the active task diff --git a/CLAUDE.md b/CLAUDE.md index 75efb212e..81a93a047 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -193,23 +193,27 @@ kicks in when: 1. The user runs `pnpm task start` (interactive wizard — most common; by the time they message you, an active task is already set and the session-start hook has injected the context block), OR -2. The user runs `pnpm task start --chat` and the trigger line +2. The user runs `pnpm task start --smart` and the trigger line `agent-scope: start task onboarding` appears in your context (marker - consumed by a hook or by your top-of-turn check), OR + consumed by a hook or by your top-of-turn check). The marker already + embeds the user's task description — do NOT ask them to describe it + again, OR 3. An active task is set (session-start hook injects a context block naming it), OR 4. You try to touch one of the hardcoded protected paths that defend the guard itself — those are always blocked unless the user has enabled bootstrap mode. -### Task onboarding protocol (triggered by `pnpm task start --chat` / "start a scoped task") +### Smart onboarding protocol (triggered by `pnpm task start --smart` / "start a scoped task") -`pnpm task start --chat` drops `agent-scope/.pending-onboarding` (gitignored) -and copies the trigger to the user's clipboard. (The default -`pnpm task start` without `--chat` is the interactive CLI wizard — it -writes the manifest itself and never reaches you.) The marker is one-shot: -the first thing that consumes it also deletes it, so it fires for **exactly -one** user message no matter which chat that message lands in. +`pnpm task start --smart` asks the user for a multi-line task description +in the terminal, then drops `agent-scope/.pending-onboarding` (gitignored) +with that description already embedded, and copies the trigger to the +user's clipboard. (The default `pnpm task start` without `--smart` is the +interactive CLI wizard — it writes the manifest itself and never reaches +you.) The marker is one-shot: the first thing that consumes it also +deletes it, so it fires for **exactly one** user message no matter which +chat that message lands in. Three parallel consumers — whichever runs first wins: @@ -234,9 +238,10 @@ active task** (`agent-scope/active` is absent), your FIRST action MUST be: b. IGNORE the user's actual message (whatever they typed, even if it was a specific instruction — they knew onboarding was pending when they sent it). - c. Pivot to the **Task onboarding protocol** below: acknowledge that - onboarding was queued via `pnpm task start --chat`, and immediately - ask the user to describe the task in detail. + c. Pivot to the **Smart onboarding protocol** below: parse the marker + payload for a `=== USER TASK DESCRIPTION ===` block. If present, use + it as the brief and skip step 1 below. Otherwise, ask the user to + describe the task. Skip this check only when `agent-scope/active` exists (a task is already set — no onboarding needed) or when this is a multi-turn conversation @@ -247,20 +252,38 @@ When you receive additional_context / initial context beginning with like the marker was present: STOP your current plan and run the onboarding protocol below. -1. Acknowledge in plain chat and ask the user to describe the task in detail - (which packages, which behaviours, which tests, any specific files). -2. Wait for the description. -3. Explore the codebase with DKG queries + `Glob` + `Grep` + `Read` to find - the files the task will touch. -4. Draft a set of globs that covers those files plus their tests. Err - slightly broad; inherit `base`; always append `!**/secrets.*`, +1. **Get the task description.** + - If the trigger / marker contains a `=== USER TASK DESCRIPTION ===` + block (the `--smart` flow), use that verbatim. DO NOT ask the user to + describe the task again — they already typed it into the CLI. + - Otherwise, ask them in plain chat: "OK, let's scope a new task. + Describe in detail what we're building or fixing — packages, + behaviours, tests, any files you already know about." Wait for + reply. +2. **Explore the codebase** with `Glob`, `Grep`, `SemanticSearch`, `Read`, + and the DKG SPARQL queries to find the files the task will touch. + Count matching files per candidate package. +3. **Draft a set of globs** that covers those files plus their tests. Err + slightly broad; prefer whole-package globs (`packages//**`) over + file-level globs; inherit `base`; always append `!**/secrets.*`, `!**/.env*`. -5. Propose the scope via `AskQuestion`. The prompt must include a one-line - rephrase of the task, the list of proposed globs, and your recommendation. - Options: `approve`, `show_globs`, `edit`, `cancel`, `custom_instruction`. -6. On `approve`, print a fenced bash block with the **exact** command for the - user to run in their terminal (not you — the `afterShellExecution` hook - would delete a new manifest file you created yourself): +4. **Propose the scope via a SINGLE `AskQuestion` call with TWO questions.** + - **Q1 — packages (multi-select).** `id: "packages"`, + `allow_multiple: true`, `prompt: "Which packages should be writable + for this task?"` Include a one-line rephrase of the description and + the suggested task id in the prompt. Options: one per candidate + package labelled `" files match"`, with a sample of + relevant paths in the label where helpful. List the recommended + packages first and say so in the prompt. + - **Q2 — action (single-select).** `id: "action"`, + `allow_multiple: false`, `prompt: "Action?"`. Options (IDs must + match exactly): `approve`, `show_json`, `edit_globs`, `widen`, + `narrow`, `cancel`, `custom_instruction`. Recommend `approve` in the + prompt. +5. **On `approve`** + the Q1 package selection, print a fenced bash block + with the **exact** command for the user to run in their terminal (not + you — the `afterShellExecution` hook would delete a new manifest file + you created yourself): ```bash pnpm task create \ @@ -272,10 +295,12 @@ protocol below. ``` Wait for them to confirm ("done"/"go"), then start the actual work. -7. On `show_globs`, print the manifest JSON, then re-ask with the same options. -8. On `edit`, ask which globs to change, loop back to step 5. -9. On `cancel`, acknowledge and keep working without a task. -10. On `custom_instruction`, ask in plain chat what they want instead. +6. **On `show_json`**, print the drafted manifest, then re-ask both + questions. +7. **On `edit_globs` / `widen` / `narrow`**, ask one targeted follow-up in + chat, update the draft, then re-ask both questions. +8. **On `cancel`**, acknowledge and keep working without a task. +9. **On `custom_instruction`**, ask in plain chat what they want instead. ### Plan-mode denial protocol (runs for every agent-scope denial) @@ -340,7 +365,7 @@ AskQuestion prompt (see below). ``` pnpm task start # interactive wizard (default) — user runs this; writes + activates manifest directly -pnpm task start --chat # legacy: hand off onboarding to you via chat +pnpm task start --smart # user pastes description in CLI; agent proposes scope in chat pnpm task create [flags] # non-interactive manifest build — USER runs this pnpm task list | show | set | clear | check | audit | resolve pnpm scope:status | scope:validate | scope:test diff --git a/GEMINI.md b/GEMINI.md index 2cd01b379..8786404bc 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -15,10 +15,12 @@ Key points for Gemini: - Most users run `pnpm task start` without flags — that's an interactive CLI wizard that writes a manifest directly. You don't see anything special; by the time the user messages you, the active task is set. -- If the user runs `pnpm task start --chat`, a one-shot marker file at - `agent-scope/.pending-onboarding` is dropped. On your first action of +- If the user runs `pnpm task start --smart`, a one-shot marker file at + `agent-scope/.pending-onboarding` is dropped. The marker already + embeds the user's task description in a `=== USER TASK DESCRIPTION ===` + block — do NOT ask them to describe it again. On your first action of any new turn (when no task is active), check whether that marker exists; - if it does, delete it and run the task-onboarding protocol from + if it does, delete it and run the smart-onboarding protocol from `AGENTS.md`. - Gemini CLI does **not** have hard hook enforcement. You self-enforce by following the rules. The user trusts you to comply. diff --git a/agent-scope/README.md b/agent-scope/README.md index 5277d25e2..3106b15d7 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -29,11 +29,14 @@ You engage the system in one of four ways: drafts a manifest, shows a preview, and activates it. No agent round-trip needed; works identically in every agent (Cursor, Claude Code, Codex, Gemini, …) and even with no agent at all. -2. **Agent-guided (`pnpm task start --chat`)** — legacy flow. Drops a - one-shot marker and copies the trigger text to your clipboard. The agent - explores the repo, proposes a scope via `AskQuestion`, and prints the - exact `pnpm task create` command for you to run. Use this when you want - the agent to do the thinking. +2. **Smart onboarding (`pnpm task start --smart`)** — agent-guided flow. + The CLI asks you for a multi-line task description, then drops a + one-shot marker that embeds that description and copies the trigger + text to your clipboard. The agent reads your description, explores + the repo semantically, and proposes a scope via a rich two-part + `AskQuestion` (multi-select packages + single-select action), then + prints the exact `pnpm task create` command for you to run. Use this + when you want the agent to do the thinking. 3. **Explicit** — `pnpm task set ` activates a manifest you already have. 4. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` @@ -160,8 +163,8 @@ pnpm scope:validate # validates every manifest # Interactive wizard (default) — asks a few questions, drafts + activates a manifest pnpm task start -# Agent-guided flow (legacy) — hands off onboarding to the agent via chat -pnpm task start --chat +# Smart onboarding — paste a description in the CLI, agent proposes scope in chat +pnpm task start --smart # Non-interactive manifest creation (flags) pnpm task create my-task \ @@ -199,9 +202,10 @@ pnpm task clear ## Onboarding flow -There are two onboarding flows. The **interactive wizard** is the default; -the **agent-guided flow** (`--chat`) is an alternative when you want the -agent to do the thinking. +There are two onboarding flows. The **interactive wizard** is the default +— fully deterministic, no agent involvement. The **smart flow** +(`--smart`) is an AI-driven alternative when you want the agent to +understand your task description semantically and propose a scope. ### Flow 1 — interactive wizard (default) @@ -229,16 +233,21 @@ Run `pnpm task start`. The CLI walks you through a short questionnaire: No chat round-trip, no agent needed, runs in under a second, works identically in every agent. This is the recommended path. -If `stdin` is not a TTY (CI, piped input), `pnpm task start` auto-falls-back -to the agent-guided flow so nothing hangs. You can also force the legacy -flow with `pnpm task start --chat`. +If `stdin` is not a TTY (CI, piped input), `pnpm task start` errors out +with guidance to use `pnpm task create --flags...` directly. Both +onboarding modes need interactive input. -### Flow 2 — agent-guided (`pnpm task start --chat`) +### Flow 2 — smart onboarding (`pnpm task start --smart`) -Drops a one-shot marker at `agent-scope/.pending-onboarding` (gitignored) -and copies the trigger to your clipboard. Your NEXT message in any chat -(new or existing) makes the agent pivot to onboarding. Three parallel -consumers compete for the marker so it fires exactly once: +The CLI prompts you for a multi-line task description (finish with an +empty line), then drops a one-shot marker at +`agent-scope/.pending-onboarding` (gitignored) that *already embeds* your +description inside a `=== USER TASK DESCRIPTION ===` block. The trigger +text is also copied to your clipboard. + +Your NEXT message in any chat (new or existing) makes the agent pivot to +smart onboarding. Three parallel consumers compete for the marker so it +fires exactly once: - **New chat (Cmd+L)** — the `sessionStart` hook injects the trigger. - **Current chat, any message** — the agent's top-of-turn rule reads the @@ -247,23 +256,30 @@ consumers compete for the marker so it fires exactly once: - **Manual paste** — the trigger is already in your clipboard. The agent then follows a fixed protocol (defined in -`.cursor/rules/agent-scope.mdc` and `CLAUDE.md`): - -1. Asks you to describe what you're building or fixing. -2. Explores the codebase (Grep / Glob / DKG) to find relevant files. -3. Proposes a set of globs via `AskQuestion` in plan-mode style — approve, - show JSON, edit, cancel, or type a custom instruction. -4. On approve, prints the exact `pnpm task create` command for you to run. +`.cursor/rules/agent-scope.mdc`, `CLAUDE.md`, `AGENTS.md`, `GEMINI.md`): + +1. Reads your description from the marker (does NOT ask you again). +2. Explores the codebase (Grep / Glob / SemanticSearch / DKG) to find + relevant files. Counts matching files per candidate package. +3. Proposes a scope via a **two-part `AskQuestion`**: + - **Q1 — packages (multi-select):** pick which packages should be + writable. Each option shows the package path, file-match count, and + (where helpful) 2-3 sample file paths. The packages the agent + already decided to include are pre-selected. + - **Q2 — action (single-select):** `approve`, `show_json`, + `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. +4. On `approve`, prints the exact `pnpm task create` command for you to + run. 5. You run it in your terminal (not the agent — otherwise the - `afterShellExecution` hook would delete the new manifest as an untracked - file in a protected path). + `afterShellExecution` hook would delete the new manifest as an + untracked file in a protected path). 6. The agent starts the real work. -From here, every attempted write to an out-of-scope file triggers a plan-mode -AskQuestion menu — see **Escalation** below. +From here, every attempted write to an out-of-scope file triggers a +plan-mode AskQuestion menu — see **Escalation** below. -The marker is one-shot: the first hook that consumes it also deletes it, so -the trigger fires exactly once per `pnpm task start`. +The marker is one-shot: the first hook that consumes it also deletes it, +so the trigger fires exactly once per `pnpm task start --smart`. ## Manifest format diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 951be3da0..7ef46082e 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -14,6 +14,7 @@ import { } from '../lib/scope.mjs'; import { ONBOARDING_TRIGGER_TEXT, + buildOnboardingTrigger, writeOnboardingMarker, copyToClipboard, } from '../lib/onboarding.mjs'; @@ -200,20 +201,25 @@ async function init(id) { // Task onboarding // --------------------------------------------------------------------------- // -// There are two independent ways to start a task: +// Two independent ways to start a task: // // (1) `pnpm task start` — default. Interactive CLI wizard that asks a few -// questions, drafts a manifest, previews it, and saves + activates it. -// No agent round-trip; works identically in every agent (Cursor, -// Claude Code, Codex, Gemini, …) and even with no agent at all. +// questions (description, packages, extras), drafts a manifest, +// previews it, and saves + activates it. No agent round-trip; works +// identically in every agent and with no agent at all. Deterministic +// keyword match. // -// (2) `pnpm task start --chat` — legacy flow. Drops a one-shot marker and -// copies the trigger text to the clipboard so the agent picks up -// onboarding on the user's next message. Use this when you want the -// agent to explore the repo and propose a scope for you. +// (2) `pnpm task start --smart` — agent-guided mode. The CLI prompts +// once for a multi-line task description, then drops a marker that +// embeds that description + the Smart onboarding protocol. The next +// message the user sends in any chat makes the agent read the +// description, explore the repo, and propose a scope via a rich +// AskQuestion (two questions: multi-select packages + action). The +// agent prints a `pnpm task create ...` command for the user to run. // -// Non-interactive shell (no TTY / piped stdin) auto-falls-back to --chat so -// CI / non-interactive harnesses don't hang on a prompt. +// If stdin is not a TTY we refuse — both modes need interactive input. +// For CI / scripts use `pnpm task create --description ... --allowed +// ... --activate` directly. // --------------------------------------------------------------------------- async function start(argv = []) { @@ -226,49 +232,110 @@ async function start(argv = []) { return; } - const chatMode = argv.includes('--chat') || argv.includes('-c'); + // Accept both --smart (canonical) and --chat (old name we're migrating + // away from). If someone still has `--chat` in muscle memory, warn and + // continue — don't make the rename a paper cut. + const smartMode = argv.includes('--smart') || argv.includes('-s'); + const legacyChat = argv.includes('--chat') || argv.includes('-c'); const forceInteractive = argv.includes('--interactive') || argv.includes('-i'); - const ttyOk = Boolean(process.stdin.isTTY); + const ttyOk = Boolean(process.stdin.isTTY) || forceInteractive; - if (chatMode || (!forceInteractive && !ttyOk)) { - return startChat({ reason: chatMode ? 'flag' : 'no-tty' }); + if (legacyChat) { + console.error('warning: --chat was renamed to --smart; proceeding as --smart.'); } + + if (!ttyOk) { + console.error('error: `pnpm task start` requires an interactive terminal.'); + console.error(''); + console.error('For non-interactive / CI use, call `pnpm task create` directly:'); + console.error(' pnpm task create --description "..." \\'); + console.error(' --allowed "packages/foo/**" --inherits base --activate'); + process.exit(2); + } + + if (smartMode || legacyChat) return startSmart(); await startInteractive(); } -function startChat({ reason } = {}) { - const markerPath = writeOnboardingMarker(root); - const clip = copyToClipboard(ONBOARDING_TRIGGER_TEXT); +async function startSmart() { + console.log('agent-scope: smart task scoping'); + console.log(' (the agent will read your description, explore the repo, and propose a scope)'); + console.log(' (tip: `pnpm task start` without --smart runs the deterministic wizard instead)'); + console.log(''); + + const prompter = createPrompter(); + let description = ''; + try { + console.log('Describe the task in detail — what to build or fix, which packages / behaviours'); + console.log('/ tests, and any files you already know about. Multi-line OK.'); + console.log('Finish with an empty line.'); + console.log(''); + description = await readMultilineDescription(prompter); + } finally { + prompter.close(); + } - console.log('agent-scope: task onboarding primed (chat mode).'); - if (reason === 'no-tty') { - console.log('(stdin is not a TTY — falling back to chat mode so nothing hangs.)'); + const trimmed = description.trim(); + if (!trimmed || trimmed.length < 10) { + bail('description is too short — smart mode needs at least a sentence of context'); } + + const trigger = buildOnboardingTrigger({ description: trimmed }); + const markerPath = writeOnboardingMarker(root, trigger); + const clip = copyToClipboard(trigger); + + console.log(''); + console.log(`agent-scope: captured ${trimmed.split(/\s+/).length} words.`); console.log(''); - console.log('Your NEXT message in any chat (new or existing) will pivot the'); - console.log('agent into onboarding. The marker is then consumed, so it only'); - console.log('fires once.'); + console.log('Next step — exchange ONE short message with your agent:'); + console.log(''); + console.log(' 1. Go to your Cursor / Claude Code / Codex / Gemini chat.'); + console.log(' Any chat works — new or existing.'); + console.log(' 2. Send any message ("go", "hi", whatever).'); + console.log(' 3. The agent reads your description, explores the repo, and'); + console.log(' proposes a scope via a plan-mode AskQuestion. One click to'); + console.log(' approve (or edit) the scope.'); + console.log(' 4. Paste the generated `pnpm task create` command back here.'); console.log(''); if (clip.ok) { - console.log(`The trigger is already in your clipboard (via ${clip.method}).`); - console.log('Just send any message in your current chat — or paste (Cmd+V)'); - console.log('and send, for maximum reliability.'); + console.log(`(Trigger also copied to clipboard via ${clip.method} — pasting works too.)`); } else { - console.log(`Clipboard copy unavailable (${clip.reason}). Paste this manually:`); - console.log(''); - for (const line of ONBOARDING_TRIGGER_TEXT.split('\n')) { - console.log(' ' + line); - } + console.log(`(Clipboard copy unavailable: ${clip.reason}. Paste is optional —`); + console.log(` any message will trigger onboarding because of the marker file.)`); } console.log(''); console.log(`Marker file: ${markerPath}`); + console.log('(Auto-deleted the moment the agent reads it; one-shot.)'); console.log(''); - console.log('Prefer to skip the chat round-trip? Run `pnpm task start` without'); - console.log('--chat for the interactive wizard (default), or use'); - console.log('`pnpm task create --description "..." --allowed "" ...` directly.'); + console.log('Change your mind? `rm agent-scope/.pending-onboarding` and run'); + console.log('`pnpm task start` for the deterministic wizard instead.'); bootstrapWarning(); } +async function readMultilineDescription(prompter) { + // Read lines until we see a single empty line AFTER at least one non- + // empty line. Lets the user paste multi-paragraph text (paste usually + // ends with a blank line) or type naturally and hit Enter twice. + // + // Safety rails: cap iterations and stop on consecutive blanks before + // any content (prevents runaway loops when stdin is closed / EOF). + const lines = []; + let seenContent = false; + let blankRun = 0; + for (let i = 0; i < 2000; i++) { + const line = await prompter.ask('> '); + if (!line || !line.trim()) { + if (seenContent) break; + if (++blankRun >= 3) break; + continue; + } + blankRun = 0; + lines.push(line); + seenContent = true; + } + return lines.join('\n'); +} + async function startInteractive() { console.log('agent-scope: interactive task wizard'); console.log(' (no agent needed — hit Ctrl+C any time to cancel, nothing is saved until the final "save" step.)'); @@ -700,7 +767,7 @@ try { 'usage: task [args]', '', ' start interactive wizard: draft a manifest + activate', - ' start --chat legacy flow: hand off onboarding to the agent', + ' start --smart paste a description, agent proposes scope in chat', ' list list available task manifests', ' show show the active task and its scope', ' set set the active task', diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs index 2dd95c757..9b521e3fe 100644 --- a/agent-scope/lib/onboarding.mjs +++ b/agent-scope/lib/onboarding.mjs @@ -1,21 +1,30 @@ -// Onboarding marker + clipboard helpers for `pnpm task start`. +// Onboarding marker + clipboard helpers for `pnpm task start --smart`. // -// When the user runs `pnpm task start`, we do two things: +// The default `pnpm task start` is now a fully interactive CLI wizard that +// never involves the agent. This module powers the `--smart` mode, where +// the user pastes a task description in the CLI and the agent then reads +// that description, explores the repo, and proposes a scope. // -// 1. Drop a one-shot marker file at `agent-scope/.pending-onboarding` -// containing the full trigger text. -// 2. Try to copy the trigger to the OS clipboard. +// Flow: // -// THREE parallel consumers pick up the marker — whichever runs first wins, -// because consume is atomic (read-and-delete). The marker therefore fires -// for exactly ONE user message after `pnpm task start`, no matter which -// chat / session it lands in: +// 1. `pnpm task start --smart` reads a multi-line description from the +// user, then drops a one-shot marker file at +// `agent-scope/.pending-onboarding`. The marker contains both the +// trigger text AND the user's description, so the agent does not need +// to ask the user "describe the task" again. +// 2. The user sends any message in any chat. +// 3. THREE parallel consumers pick up the marker — whichever runs first +// wins, because consume is atomic (read-and-delete): // -// (a) `sessionStart` hook — fires on a brand new Cursor chat. -// (b) `postToolUse` hook — fires after any tool call in an existing chat. -// (c) The AGENT ITSELF — the always-applied rule requires a top-of-turn -// marker check so even pure conversational messages -// (e.g. "hi") consume the marker correctly. +// (a) `sessionStart` hook — fires on a brand new chat. +// (b) `postToolUse` hook — fires after any tool call in an existing +// chat (Cursor + Claude Code). +// (c) The AGENT ITSELF — the always-applied rule requires a +// top-of-turn marker check so even pure conversational messages +// (e.g. "hi") consume the marker correctly. +// +// 4. The agent follows the "Smart onboarding protocol" (CLAUDE.md, +// .cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md). // // Zero runtime deps. Pure-ish (spawnSync for clipboard; filesystem for marker). @@ -25,35 +34,107 @@ import { spawnSync } from 'node:child_process'; import { platform } from 'node:os'; export const ONBOARDING_MARKER_REL = 'agent-scope/.pending-onboarding'; +export const DESCRIPTION_OPEN = '=== USER TASK DESCRIPTION (already provided — do NOT ask again) ==='; +export const DESCRIPTION_CLOSE = '=== END DESCRIPTION ==='; + +// Build the marker / trigger payload. If `description` is provided, the +// agent is told the user has already described the task; otherwise the +// agent is told to ask for a description (used for tests + edge cases only +// — in practice the CLI refuses to drop a marker without a description). +// +// Keep the first line stable: hooks and rules key off the prefix +// `agent-scope: start task onboarding.`. +export function buildOnboardingTrigger({ description = '' } = {}) { + const desc = typeof description === 'string' ? description.trim() : ''; + const hasDesc = desc.length > 0; + + const descBlock = hasDesc + ? [ + '', + DESCRIPTION_OPEN, + desc, + DESCRIPTION_CLOSE, + '', + ] + : []; -// The canonical trigger text the agent sees when onboarding is requested. -// Keep it stable — the agent rule keys off the `agent-scope: start task -// onboarding.` prefix. -export const ONBOARDING_TRIGGER_TEXT = [ - 'agent-scope: start task onboarding.', - '', - 'The user just ran `pnpm task start`. Follow the Task onboarding protocol', - 'in CLAUDE.md and .cursor/rules/agent-scope.mdc EXACTLY:', - '', - ' 1. Stop whatever you were about to do on this turn.', - ' 2. Delete `agent-scope/.pending-onboarding` if it still exists.', - ' 3. Ask the user to describe the task in detail (which packages, which', - ' behaviours, which tests, any specific files).', - ' 4. Wait for the description.', - ' 5. Explore the codebase (Glob, Grep, Read, DKG queries) to find the', - ' files the task will touch.', - ' 6. Draft a conservative set of allowed globs (inherit `base`, append', - ' `!**/secrets.*` and `!**/.env*`).', - ' 7. Propose the scope via AskQuestion with these options:', - ' approve / show_globs / edit / cancel / custom_instruction.', - ' 8. On approve: print a fenced bash block with the exact', - ' `pnpm task create ... --activate` command for the user to run.', - ' Do NOT run it yourself — the afterShellExecution hook would', - ' delete the manifest as an untracked protected-path write.', - '', - 'Your onboarding turn starts now. Ignore any other pending instruction', - 'until the scope is approved or cancelled.', -].join('\n'); + return [ + 'agent-scope: start task onboarding.', + '', + hasDesc + ? 'The user ran `pnpm task start --smart` and has already provided their task description below. DO NOT ask them to describe it again — use the description as your brief.' + : 'The user ran `pnpm task start --smart` but did not include a description. Ask them to describe the task in detail before proceeding.', + ...descBlock, + 'Smart onboarding protocol — follow EXACTLY (full text in CLAUDE.md,', + '.cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md):', + '', + ' 1. Stop whatever you were about to do on this turn.', + ' 2. Delete `agent-scope/.pending-onboarding` if it still exists.', + hasDesc + ? ' 3. Read the description above. Do not ask the user to describe it.' + : ' 3. Ask the user in plain chat to describe the task in detail; wait for reply.', + ' 4. Explore the codebase — Glob, Grep, Read, SemanticSearch, DKG —', + ' to find the files the task will touch.', + ' 5. Draft a conservative set of allowed globs:', + ' - inherit `base` (standard build-artefact exemptions)', + ' - append `!**/secrets.*` and `!**/.env*` safety denies', + ' - prefer whole-package globs (packages//**) over files', + ' when in doubt — over-scoping is safe, under-scoping causes', + ' constant denials mid-work.', + ' 6. Propose the scope via a SINGLE `AskQuestion` call with TWO questions:', + '', + ' Q1 (allow_multiple = true):', + ' prompt: "Which packages should be writable for this task?"', + ' options: one entry per candidate package, labelled', + ' " files match description keywords"', + ' plus a sample of 2-3 relevant paths where helpful.', + ' pre-check: the packages you already decided to include.', + '', + ' Q2 (single-select):', + ' prompt: "Action?"', + ' options:', + ' - approve — "Create + activate this scope"', + ' - show_json — "Show the full manifest JSON first"', + ' - edit_globs — "Let me hand-edit the allowed/deny globs"', + ' - widen — "Let me add another package/file"', + ' - narrow — "Let me remove a package/file"', + ' - cancel — "Abort, no task"', + ' - custom_instruction — "Let me type my own instruction"', + '', + ' 7. On `approve` (Q2) with the Q1 selection:', + ' Print a fenced bash block with the EXACT `pnpm task create ...', + ' --activate` command. Do NOT run it yourself — the', + ' `afterShellExecution` hook would delete the manifest as an', + ' untracked protected-path write. The user runs it.', + '', + ' 8. On `show_json`: print the drafted manifest, then re-ask step 6.', + ' 9. On `edit_globs` / `widen` / `narrow`: ask one follow-up in chat,', + ' update the draft, then re-ask step 6.', + ' 10. On `cancel`: acknowledge, no task is set, continue unscoped.', + ' 11. On `custom_instruction`: ask in plain chat, then do what the user', + ' says.', + '', + 'Your onboarding turn starts now. Skip any other pending work until the', + 'scope is approved or cancelled.', + ].join('\n'); +} + +// Description-less trigger, kept as an export for backwards compatibility +// (existing hooks inject this text; existing tests assert its shape). New +// code should call `buildOnboardingTrigger({ description })`. +export const ONBOARDING_TRIGGER_TEXT = buildOnboardingTrigger(); + +// Extract the description back out of a marker payload. Returns the +// description string, or '' if the marker had no description block. +// Tolerant of whitespace and trailing noise. +export function extractDescription(payload) { + if (typeof payload !== 'string' || !payload.length) return ''; + const open = payload.indexOf(DESCRIPTION_OPEN); + const close = payload.indexOf(DESCRIPTION_CLOSE); + if (open < 0 || close < 0 || close < open) return ''; + const start = open + DESCRIPTION_OPEN.length; + return payload.slice(start, close).trim(); +} // --------------------------------------------------------------------------- // Marker file lifecycle diff --git a/agent-scope/lib/onboarding.test.mjs b/agent-scope/lib/onboarding.test.mjs index e74d492f8..416963afc 100644 --- a/agent-scope/lib/onboarding.test.mjs +++ b/agent-scope/lib/onboarding.test.mjs @@ -9,6 +9,10 @@ import { tmpdir } from 'node:os'; import { ONBOARDING_MARKER_REL, ONBOARDING_TRIGGER_TEXT, + DESCRIPTION_OPEN, + DESCRIPTION_CLOSE, + buildOnboardingTrigger, + extractDescription, onboardingMarkerPath, writeOnboardingMarker, hasOnboardingMarker, @@ -34,11 +38,65 @@ test('ONBOARDING_TRIGGER_TEXT starts with the canonical prefix and covers the pr ONBOARDING_TRIGGER_TEXT.startsWith('agent-scope: start task onboarding'), 'trigger must begin with the documented prefix', ); - assert.ok(ONBOARDING_TRIGGER_TEXT.includes('Task onboarding protocol')); + assert.ok(ONBOARDING_TRIGGER_TEXT.includes('Smart onboarding protocol')); assert.ok(ONBOARDING_TRIGGER_TEXT.includes('AskQuestion')); assert.ok(ONBOARDING_TRIGGER_TEXT.includes('pnpm task create')); }); +test('buildOnboardingTrigger: without description → description-less trigger', () => { + const t = buildOnboardingTrigger(); + assert.equal(t, ONBOARDING_TRIGGER_TEXT); + assert.ok(!t.includes(DESCRIPTION_OPEN)); +}); + +test('buildOnboardingTrigger: embeds the description in a fenced block', () => { + const desc = 'Refactor peer sync in agent + core packages.'; + const t = buildOnboardingTrigger({ description: desc }); + assert.ok(t.includes(DESCRIPTION_OPEN)); + assert.ok(t.includes(DESCRIPTION_CLOSE)); + assert.ok(t.includes(desc)); + assert.ok(t.includes('DO NOT ask them to describe it again')); +}); + +test('buildOnboardingTrigger: preserves multi-line descriptions verbatim', () => { + const desc = 'line one\nline two\n\nline four'; + const t = buildOnboardingTrigger({ description: desc }); + assert.ok(t.includes(desc)); +}); + +test('buildOnboardingTrigger: trims leading/trailing whitespace on description', () => { + const t = buildOnboardingTrigger({ description: ' hello \n' }); + assert.ok(t.includes('hello')); + assert.ok(!t.includes(' hello'), 'leading spaces should be trimmed'); +}); + +test('buildOnboardingTrigger: empty string description → treated as missing', () => { + const t = buildOnboardingTrigger({ description: ' \n ' }); + assert.equal(t, ONBOARDING_TRIGGER_TEXT); +}); + +test('extractDescription: round-trips through a smart trigger', () => { + const desc = 'Refactor peer sync\nwith workspace auth.'; + const t = buildOnboardingTrigger({ description: desc }); + assert.equal(extractDescription(t), desc); +}); + +test('extractDescription: returns empty string for a description-less trigger', () => { + assert.equal(extractDescription(ONBOARDING_TRIGGER_TEXT), ''); +}); + +test('extractDescription: tolerates nulls and non-strings', () => { + assert.equal(extractDescription(null), ''); + assert.equal(extractDescription(undefined), ''); + assert.equal(extractDescription(''), ''); + assert.equal(extractDescription({}), ''); +}); + +test('extractDescription: returns empty when markers are malformed (close before open)', () => { + const bad = `${DESCRIPTION_CLOSE} text ${DESCRIPTION_OPEN}`; + assert.equal(extractDescription(bad), ''); +}); + test('onboardingMarkerPath joins repo root with the relative marker path', () => { const root = mkRoot(); try { From 69261963735e33dc8d77561236a2dad6a97d53b9 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 12:26:19 +0200 Subject: [PATCH 08/21] agent-scope: allow agent-run pnpm task create to persist the manifest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The smart-onboarding flow ends with a plan-mode AskQuestion where the user explicitly approves the proposed scope. Until now, the agent still had to bounce the `pnpm task create` command back for the user to run manually — the afterShell hooks would otherwise delete the new manifest as an untracked write inside the protected `agent-scope/tasks/**` path. Add a narrow allowlist in both hooks (`.cursor/hooks/shell-diff-check.mjs` and `.claude/hooks/shell-diff-check.mjs`) driven by `extractTaskCreateId()` in `shell-parse.mjs`. When the shell command that just ran matches the canonical shapes — - pnpm task create ... - pnpm run task create ... - node agent-scope/bin/task.mjs create ... — and the id validates against the manifest-id regex, the hook lets the two specific files that command legitimately writes (`agent-scope/tasks/.json` and `agent-scope/active`) persist. Everything else inside the same turn — impostor `echo > ...`, `cp`, opaque evaluators, other task manifests, ids with path-escape chars, non-canonical wrappers (npm/yarn/bun), Write/Edit tool calls — is still reverted/deleted. Each waived write is audited to the denial log as `afterShell.approved-create`. Protocol docs (CLAUDE.md, .cursor/rules/agent-scope.mdc, AGENTS.md, README.md) are updated to say the agent runs `pnpm task create` itself on approve. - 14 new shell-parse tests (60 in the parse suite; 200 total). - E2E smoke (/tmp/allowlist-smoke.sh) covers 6 scenarios including chained commands and log auditing. Made-with: Cursor --- .claude/hooks/shell-diff-check.mjs | 31 ++++++- .cursor/hooks/shell-diff-check.mjs | 30 +++++- .cursor/rules/agent-scope.mdc | 17 ++-- AGENTS.md | 33 ++++--- CLAUDE.md | 20 ++-- agent-scope/README.md | 55 +++++++++-- agent-scope/lib/shell-parse.mjs | 103 ++++++++++++++++++++ agent-scope/lib/shell-parse.test.mjs | 134 +++++++++++++++++++++++++++ 8 files changed, 389 insertions(+), 34 deletions(-) diff --git a/.claude/hooks/shell-diff-check.mjs b/.claude/hooks/shell-diff-check.mjs index 3415e7e87..3bf2e9a69 100755 --- a/.claude/hooks/shell-diff-check.mjs +++ b/.claude/hooks/shell-diff-check.mjs @@ -24,11 +24,13 @@ const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const parseUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/shell-parse.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, checkNodeVersion, } = await import(scopeUrl); const { logDenial } = await import(logUrl); const { buildAfterShellContext } = await import(denialUrl); +const { extractTaskCreateId, approvedTaskCreateWrites } = await import(parseUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -83,12 +85,39 @@ async function main() { const porcelain = gitPorcelain(root); if (porcelain === null) return emit({}); + // Approved-task-create allowlist: if the command that just ran was + // `pnpm task create ` (or the canonical node equivalent), allow + // the two specific files that command legitimately writes — + // agent-scope/tasks/.json + // agent-scope/active + // Every other protected-path write still gets reverted/deleted. + const approvedId = extractTaskCreateId(command); + const approvedWrites = approvedTaskCreateWrites(approvedId); + const approved = []; + const entries = parsePorcelain(porcelain); const outOfScope = entries.filter(({ path }) => { if (!path) return false; const d = checkPath(task, path, root); - return d === 'deny' || d === 'protected'; + if (d !== 'deny' && d !== 'protected') return false; + if (approvedWrites.has(path)) { approved.push(path); return false; } + return true; }); + + if (approved.length) { + for (const p of approved) { + logDenial(root, { + event: 'afterShell.approved-create', + tool: 'Bash', + path: p, + task: approvedId, + command, + sessionId, + agent: 'claude-code', + }); + } + } + if (outOfScope.length === 0) return emit({}); const reverted = []; diff --git a/.cursor/hooks/shell-diff-check.mjs b/.cursor/hooks/shell-diff-check.mjs index b73b3c75d..e149605fc 100755 --- a/.cursor/hooks/shell-diff-check.mjs +++ b/.cursor/hooks/shell-diff-check.mjs @@ -19,11 +19,13 @@ const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; +const parseUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/shell-parse.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, checkNodeVersion, } = await import(scopeUrl); const { logDenial } = await import(logUrl); const { buildAfterShellContext } = await import(denialUrl); +const { extractTaskCreateId, approvedTaskCreateWrites } = await import(parseUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -73,12 +75,38 @@ async function main() { const porcelain = gitPorcelain(root); if (porcelain === null) return emit({}); + // Approved-task-create allowlist: if the command that just ran was + // `pnpm task create ` (or the canonical node equivalent), we allow + // the two specific files that command legitimately writes — + // agent-scope/tasks/.json + // agent-scope/active + // Every other protected-path write still gets reverted/deleted. + const approvedId = extractTaskCreateId(command); + const approvedWrites = approvedTaskCreateWrites(approvedId); + const approved = []; + const entries = parsePorcelain(porcelain); const outOfScope = entries.filter(({ path }) => { if (!path) return false; const d = checkPath(task, path, root); - return d === 'deny' || d === 'protected'; + if (d !== 'deny' && d !== 'protected') return false; + if (approvedWrites.has(path)) { approved.push(path); return false; } + return true; }); + + if (approved.length) { + for (const p of approved) { + logDenial(root, { + event: 'afterShell.approved-create', + tool: 'Shell', + path: p, + task: approvedId, + command, + sessionId, + }); + } + } + if (outOfScope.length === 0) return emit({}); const reverted = []; diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 4f44fc01a..f80a9ca5b 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -144,10 +144,13 @@ Follow these steps exactly: - Your recommendation should appear inline in the prompt ("I recommend `approve`"). -5. **On `approve` + the Q1 package selection:** print a fenced bash block - with the **exact** command for the user to run in their own terminal. - The user must run it (not you) because the `afterShellExecution` hook - would delete an untracked file in `agent-scope/tasks/` if you did. +5. **On `approve` + the Q1 package selection:** **YOU (the agent) run the + command directly** via the Shell tool. The `afterShellExecution` hook + has a narrow allowlist for the canonical task-create invocation: + `pnpm task create ...` and `node agent-scope/bin/task.mjs create + ...` are the ONLY shapes whose writes to `agent-scope/tasks/.json` + and `agent-scope/active` may persist. The command's `--allowed` flags + must match Q1 verbatim (any divergence is a protocol violation): ```bash pnpm task create \ @@ -159,8 +162,10 @@ Follow these steps exactly: --activate ``` - Then say "Let me know when you've run that and I'll start." Wait for - the user to confirm with any short acknowledgement ("done", "go", "ok"). + After the command succeeds, continue with the actual work in the same + turn. Do NOT bounce the command back to the user — they already + approved the scope via AskQuestion. If the command fails, surface the + error and re-ask via AskQuestion instead of retrying blindly. 6. **On `show_json`:** print the drafted manifest JSON, then re-ask the same two questions. diff --git a/AGENTS.md b/AGENTS.md index be126130c..2c8f264ed 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -102,9 +102,10 @@ active: option. - **Q2 (single-select, "Action?"):** `approve`, `show_json`, `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. -5. On `approve` + the Q1 package selection: print the **exact** command - for the user to run in their terminal (do NOT run it yourself — - see the warning below): +5. On `approve` + the Q1 package selection: **run the command yourself** + via your shell tool, then continue with the actual work in the same + turn. The command's `--allowed` flags must match Q1 verbatim (the user + approved a specific scope — altering it is a protocol violation): ```bash pnpm task create \ @@ -115,13 +116,23 @@ active: --activate ``` - Wait for them to confirm ("done" / "go"), then begin the actual work. - -> ⚠️ **Why YOU don't run `pnpm task create`** — on Cursor / Claude Code the -> `afterShellExecution` / PostToolUse Bash hook deletes any new file you -> create inside `agent-scope/tasks/**` (it's a protected path). Codex CLI -> doesn't have that hook so the file would persist there, but you should -> still defer to the user for consistency across agents. + If the command fails, surface the error and re-ask via AskQuestion + instead of retrying blindly. + +> 🛈 **On Cursor / Claude Code the `afterShellExecution` / PostToolUse Bash +> hook has a narrow allowlist** so this one invocation can persist the +> new `agent-scope/tasks/.json` and `agent-scope/active` files. The +> hook recognises only the canonical shapes: +> +> - `pnpm task create ...` +> - `pnpm run task create ...` +> - `node agent-scope/bin/task.mjs create ...` +> +> Any other write to `agent-scope/tasks/**` or `agent-scope/active` (e.g. +> `echo ... > agent-scope/tasks/evil.json`, `cp`, Write/Edit tool, opaque +> evaluators) is still reverted/deleted as before. Agents without hooks +> (Codex CLI, Gemini CLI, etc.) have no hook gate to worry about; just +> follow the protocol. ## Plan-mode denial protocol @@ -159,7 +170,7 @@ pnpm task show # show the active task and its scope pnpm task set # set the active task pnpm task clear # clear the active task pnpm task check # check a path against the active task -pnpm task create [flags] # create a manifest non-interactively (USER runs) +pnpm task create [flags] # create a manifest non-interactively (agent runs on approve, allowlisted by hooks) pnpm task validate # validate all manifests pnpm task audit [--since N] # show recent denials pnpm task resolve # debug: show how the active task is resolved diff --git a/CLAUDE.md b/CLAUDE.md index 81a93a047..cd8918324 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -280,10 +280,14 @@ protocol below. match exactly): `approve`, `show_json`, `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. Recommend `approve` in the prompt. -5. **On `approve`** + the Q1 package selection, print a fenced bash block - with the **exact** command for the user to run in their terminal (not - you — the `afterShellExecution` hook would delete a new manifest file - you created yourself): +5. **On `approve`** + the Q1 package selection, **YOU (the agent) run the + command yourself** via the Shell tool. The `afterShellExecution` hook + has a narrow allowlist for exactly this invocation: a canonical + `pnpm task create ...` or + `node agent-scope/bin/task.mjs create ...` is the ONLY shape that + may persist new files under `agent-scope/tasks/` and `agent-scope/active`. + The command must match Q1's package selection verbatim (any deviation + is a protocol violation — the user approved a specific scope): ```bash pnpm task create \ @@ -294,7 +298,11 @@ protocol below. --activate ``` - Wait for them to confirm ("done"/"go"), then start the actual work. + After the command succeeds (exit 0, manifest shown), continue with the + actual work in the same turn. Do NOT bounce the command back to the + user — they already approved it via the AskQuestion. If the command + fails for any reason (schema error, collision, etc.), surface the + error and re-ask via AskQuestion instead of retrying blindly. 6. **On `show_json`**, print the drafted manifest, then re-ask both questions. 7. **On `edit_globs` / `widen` / `narrow`**, ask one targeted follow-up in @@ -366,7 +374,7 @@ AskQuestion prompt (see below). ``` pnpm task start # interactive wizard (default) — user runs this; writes + activates manifest directly pnpm task start --smart # user pastes description in CLI; agent proposes scope in chat -pnpm task create [flags] # non-interactive manifest build — USER runs this +pnpm task create [flags] # non-interactive manifest build — agent runs this on approve (allowlisted) pnpm task list | show | set | clear | check | audit | resolve pnpm scope:status | scope:validate | scope:test ``` diff --git a/agent-scope/README.md b/agent-scope/README.md index 3106b15d7..37c81594f 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -34,9 +34,11 @@ You engage the system in one of four ways: one-shot marker that embeds that description and copies the trigger text to your clipboard. The agent reads your description, explores the repo semantically, and proposes a scope via a rich two-part - `AskQuestion` (multi-select packages + single-select action), then - prints the exact `pnpm task create` command for you to run. Use this - when you want the agent to do the thinking. + `AskQuestion` (multi-select packages + single-select action), and on + `approve` runs `pnpm task create` itself (the `afterShellExecution` + hook has a narrow allowlist for the canonical invocation — see + "Architecture / approved-task-create allowlist" below). Use this when + you want the agent to do the thinking. 3. **Explicit** — `pnpm task set ` activates a manifest you already have. 4. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` @@ -63,6 +65,40 @@ All four agent-facing layers use the same library other up, so destructive commands that slip past the pre-check get reverted or deleted afterwards. +### Approved-task-create allowlist + +The after-shell hooks include a narrow, audited allowlist so the agent +can finish the smart-onboarding flow itself — i.e. on plan-mode +`approve`, the agent runs `pnpm task create ...` and the hook lets +the resulting `agent-scope/tasks/.json` plus `agent-scope/active` +persist. + +The allowlist is: + +- **Deterministic** — `agent-scope/lib/shell-parse.mjs` (`extractTaskCreateId`) + tokenises the command and only matches canonical shapes: + `pnpm task create `, `pnpm run task create `, or + `node agent-scope/bin/task.mjs create `. Impostors like + `echo ... > agent-scope/tasks/evil.json`, `cp`, opaque evaluators + (`node -e`, `python -c`), `npm`/`yarn`/`bun` wrappers, or ids with + path-escape chars (`..`, `/`, `.`, spaces) return `null` and fall back + to the default revert/delete behaviour. +- **Narrow** — even with a valid id, only two paths are waived: + `agent-scope/tasks/.json` (that specific id) and `agent-scope/active`. + Other files written inside `agent-scope/tasks/**` in the same turn + (including other task manifests) are still reverted/deleted. +- **Audited** — every approved write is logged to + `agent-scope/logs/denials.jsonl` as an `afterShell.approved-create` + event alongside the command and task id. +- **Validated** — the CLI itself rejects invalid ids and schema errors, + so a syntactically-invalid manifest never reaches disk for the hook to + allow. + +This keeps the smart-onboarding UX one-step (agent runs the command +after you click Approve) without weakening protection: every non-matching +write to `agent-scope/tasks/**` and `agent-scope/active` is still +immediately reverted. + ## Concepts | Concept | File | Description | @@ -268,12 +304,13 @@ The agent then follows a fixed protocol (defined in already decided to include are pre-selected. - **Q2 — action (single-select):** `approve`, `show_json`, `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. -4. On `approve`, prints the exact `pnpm task create` command for you to - run. -5. You run it in your terminal (not the agent — otherwise the - `afterShellExecution` hook would delete the new manifest as an - untracked file in a protected path). -6. The agent starts the real work. +4. On `approve`, the agent itself runs `pnpm task create ...` via + the shell tool. The `afterShellExecution` / PostToolUse-Bash hooks + recognise the canonical task-create invocation and allow its two + specific writes (`agent-scope/tasks/.json` and `agent-scope/active`) + to persist; every other write to those paths is still reverted. See + the "approved-task-create allowlist" section for details. +5. The agent starts the real work in the same turn. From here, every attempted write to an out-of-scope file triggers a plan-mode AskQuestion menu — see **Escalation** below. diff --git a/agent-scope/lib/shell-parse.mjs b/agent-scope/lib/shell-parse.mjs index 9667dc4ed..612cddd4c 100644 --- a/agent-scope/lib/shell-parse.mjs +++ b/agent-scope/lib/shell-parse.mjs @@ -183,3 +183,106 @@ export function bodyTouchesProtected(body, protectedPatterns) { const literals = literalsFromProtected(protectedPatterns); return literals.some(lit => lit && body.includes(lit)); } + +// --------------------------------------------------------------------------- +// Approved-task-create detector +// --------------------------------------------------------------------------- +// +// The afterShell hook normally reverts / deletes any writes to protected +// paths (including `agent-scope/tasks/**`). That's the right default — we +// don't want the agent to silently mint itself a wider task scope. +// +// BUT: the smart-onboarding protocol ends with a plan-mode `AskQuestion` +// where the USER explicitly approves the proposed scope. Post-approval, +// the agent should be able to run `pnpm task create ...` directly +// without bouncing the command back to the user to copy-paste. +// +// This function extracts the task id from the command IFF the command +// invokes the canonical task-create CLI. The hook uses that id to +// allowlist only two specific files: +// +// - agent-scope/tasks/.json (the manifest the CLI just wrote) +// - agent-scope/active (set when --activate was passed) +// +// Everything else in `agent-scope/**` (hooks, library, bin, other tasks) +// stays hard-protected. Writes to ANY other path still get reverted. +// +// Supported invocations: +// pnpm task create [flags] +// pnpm run task create [flags] +// node agent-scope/bin/task.mjs create [flags] +// node ./agent-scope/bin/task.mjs create [flags] +// +// Returns: the task id string, or null if the command is not a valid +// task-create invocation. Task id validation matches the JSON schema +// (kebab-case, alphanumerics + hyphens/underscores, 1-64 chars). + +const TASK_ID_RE = /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,63}$/; + +function looksLikePnpm(t) { + if (!t) return false; + const tail = t.split('/').pop(); + return tail === 'pnpm'; +} + +function looksLikeNode(t) { + if (!t) return false; + const tail = t.split('/').pop(); + return tail === 'node' || tail === 'node.exe'; +} + +function looksLikeTaskJs(t) { + if (!t) return false; + const norm = t.replace(/^\.\//, ''); + return ( + norm === 'agent-scope/bin/task.mjs' || + norm.endsWith('/agent-scope/bin/task.mjs') + ); +} + +// Extract the id from a single tokenized subcommand, or null. +function extractIdFromTokens(tokens) { + if (!tokens?.length) return null; + + if (looksLikePnpm(tokens[0])) { + let i = 1; + if (tokens[i] === 'run') i++; + if (tokens[i] !== 'task') return null; + if (tokens[i + 1] !== 'create') return null; + const id = tokens[i + 2]; + return id && TASK_ID_RE.test(id) ? id : null; + } + + if (looksLikeNode(tokens[0])) { + if (!looksLikeTaskJs(tokens[1])) return null; + if (tokens[2] !== 'create') return null; + const id = tokens[3]; + return id && TASK_ID_RE.test(id) ? id : null; + } + + return null; +} + +// Scan the full command (which may contain multiple sub-commands joined +// with `&&` / `||` / `;` / `|`) and return the FIRST approved task-create +// id we find, or null. +export function extractTaskCreateId(command) { + if (typeof command !== 'string' || !command.trim()) return null; + for (const sub of splitCommands(command)) { + const tokens = tokenize(sub); + const id = extractIdFromTokens(tokens); + if (id) return id; + } + return null; +} + +// Given a task id, return the POSIX relative paths the afterShell hook +// should allow past protection. Always includes the manifest and the +// `active` file. Stable sort / lowercase for comparison callers. +export function approvedTaskCreateWrites(id) { + if (!id || !TASK_ID_RE.test(id)) return new Set(); + return new Set([ + `agent-scope/tasks/${id}.json`, + 'agent-scope/active', + ]); +} diff --git a/agent-scope/lib/shell-parse.test.mjs b/agent-scope/lib/shell-parse.test.mjs index c5e5e43e1..7f8f7aeaa 100644 --- a/agent-scope/lib/shell-parse.test.mjs +++ b/agent-scope/lib/shell-parse.test.mjs @@ -7,6 +7,7 @@ import { splitCommands, tokenize, extractRedirections, extractDestructiveTargets, extractFindTargets, extractXargsTarget, extractNestedShellBody, extractOpaqueBody, bodyHasWriteIntent, bodyTouchesProtected, + extractTaskCreateId, approvedTaskCreateWrites, } from './shell-parse.mjs'; import { PROTECTED_PATTERNS } from './scope.mjs'; @@ -246,3 +247,136 @@ test('scenario: benign node command (read-only) is not flagged', () => { // Body references protected path but has no write intent → not flagged. assert.equal(bodyHasWriteIntent(opaque.body), false); }); + +// --- extractTaskCreateId + approvedTaskCreateWrites ---------------------- +// The afterShell hook uses these to allow the ONE specific write that a +// `pnpm task create ` invocation legitimately produces. Everything +// else under agent-scope/** still gets reverted. + +test('extractTaskCreateId: pnpm task create returns the id', () => { + assert.equal( + extractTaskCreateId('pnpm task create peer-sync-auth --inherits base --activate'), + 'peer-sync-auth', + ); +}); + +test('extractTaskCreateId: pnpm run task create returns the id', () => { + assert.equal( + extractTaskCreateId('pnpm run task create my-id'), + 'my-id', + ); +}); + +test('extractTaskCreateId: node agent-scope/bin/task.mjs create ', () => { + assert.equal( + extractTaskCreateId('node agent-scope/bin/task.mjs create my-id --activate'), + 'my-id', + ); +}); + +test('extractTaskCreateId: node ./agent-scope/bin/task.mjs create ', () => { + assert.equal( + extractTaskCreateId('node ./agent-scope/bin/task.mjs create my-id'), + 'my-id', + ); +}); + +test('extractTaskCreateId: absolute node path works', () => { + assert.equal( + extractTaskCreateId('/usr/local/bin/node agent-scope/bin/task.mjs create my-id'), + 'my-id', + ); +}); + +test('extractTaskCreateId: absolute pnpm path works', () => { + assert.equal( + extractTaskCreateId('/opt/homebrew/bin/pnpm task create my-id --activate'), + 'my-id', + ); +}); + +test('extractTaskCreateId: quoted id is accepted', () => { + assert.equal( + extractTaskCreateId('pnpm task create "my-id" --description "x"'), + 'my-id', + ); +}); + +test('extractTaskCreateId: chained command — picks up the create sub-command', () => { + assert.equal( + extractTaskCreateId('git add . && pnpm task create my-id --activate'), + 'my-id', + ); + assert.equal( + extractTaskCreateId('pnpm task create my-id --activate && pnpm task show'), + 'my-id', + ); +}); + +test('extractTaskCreateId: pnpm task list / set / clear / show → null', () => { + assert.equal(extractTaskCreateId('pnpm task list'), null); + assert.equal(extractTaskCreateId('pnpm task set my-id'), null); + assert.equal(extractTaskCreateId('pnpm task clear'), null); + assert.equal(extractTaskCreateId('pnpm task show'), null); +}); + +test('extractTaskCreateId: missing id → null', () => { + assert.equal(extractTaskCreateId('pnpm task create'), null); + assert.equal(extractTaskCreateId('pnpm task create --activate'), null); +}); + +test('extractTaskCreateId: id starts with hyphen (flag-like) → null', () => { + assert.equal(extractTaskCreateId('pnpm task create --my-id'), null); + assert.equal(extractTaskCreateId('pnpm task create -foo'), null); +}); + +test('extractTaskCreateId: id with path-escape / special chars → null', () => { + assert.equal(extractTaskCreateId('pnpm task create ../evil'), null); + assert.equal(extractTaskCreateId('pnpm task create my/id'), null); + assert.equal(extractTaskCreateId('pnpm task create "my id"'), null); + assert.equal(extractTaskCreateId('pnpm task create my.id'), null); + assert.equal(extractTaskCreateId('pnpm task create $(whoami)'), null); +}); + +test('extractTaskCreateId: impostor that writes to tasks/ directly → null', () => { + assert.equal( + extractTaskCreateId('echo \'{"id":"evil"}\' > agent-scope/tasks/evil.json'), + null, + ); + assert.equal( + extractTaskCreateId('cp /tmp/m.json agent-scope/tasks/evil.json'), + null, + ); + assert.equal( + extractTaskCreateId("node -e \"require('fs').writeFileSync('agent-scope/tasks/evil.json', '{}')\""), + null, + ); +}); + +test('extractTaskCreateId: non-canonical scripts → null (forces bootstrap)', () => { + assert.equal(extractTaskCreateId('npm run task create my-id'), null); + assert.equal(extractTaskCreateId('yarn task create my-id'), null); + assert.equal(extractTaskCreateId('bun task create my-id'), null); +}); + +test('extractTaskCreateId: non-string / empty → null', () => { + assert.equal(extractTaskCreateId(''), null); + assert.equal(extractTaskCreateId(null), null); + assert.equal(extractTaskCreateId(undefined), null); + assert.equal(extractTaskCreateId(42), null); + assert.equal(extractTaskCreateId(' '), null); +}); + +test('approvedTaskCreateWrites: returns manifest + active paths for valid id', () => { + const s = approvedTaskCreateWrites('peer-sync-auth'); + assert.equal(s.size, 2); + assert.ok(s.has('agent-scope/tasks/peer-sync-auth.json')); + assert.ok(s.has('agent-scope/active')); +}); + +test('approvedTaskCreateWrites: invalid / empty id → empty set', () => { + assert.equal(approvedTaskCreateWrites(null).size, 0); + assert.equal(approvedTaskCreateWrites('').size, 0); + assert.equal(approvedTaskCreateWrites('../evil').size, 0); + assert.equal(approvedTaskCreateWrites('a/b').size, 0); +}); From 499757e8381f04bc10088eb2fcb6206f8c333bff Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 12:39:32 +0200 Subject: [PATCH 09/21] agent-scope: single-Enter submission for smart-mode description MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously `pnpm task start --smart` required two Enter presses to submit a description: one to end the line, one to signal "done". The reader waited for a blank line after seeing content, which was cumbersome for the common case of a short one-line task summary. Fix: single-Enter submits immediately. Multi-line pastes are still captured in full via paste-detection — terminals deliver each pasted line as a separate `line` event within a few ms, so after the first blocking line read we poll `tryReadLine(80ms)` and only stop once no new line arrives inside that quiet window. Trailing blank lines from pastes are trimmed; blank lines inside a paste (paragraph breaks) are preserved. Implementation: - New `tryReadLine(timeoutMs)` primitive on the prompter that cleans up its own waiter on timeout (no leak if the line never arrives). - New `askPasteableDescription(prompt, opts)` prompter method composing blocking first-line read + paste-detection tail. Configurable quiet window, line cap, and leading-blank tolerance. - `readMultilineDescription()` in `bin/task.mjs` replaced with a direct call to `prompter.askPasteableDescription('> ')`. - CLI copy updated: "Finish with an empty line." → "Press Enter to submit. (Multi-line pastes are captured in full.)" - README onboarding-flow section updated to match. Tests: 16 new cases in `agent-scope/lib/prompter.test.mjs` covering tryReadLine (buffered/timeout/late/closed/no-steal) and askPasteableDescription (single-line, multi-line paste, paragraph breaks, trailing-blank trim, leading-blank tolerance, bail path, inside-window late line, outside-window late line, maxLines cap on runaway input). Added to `scope:test` — full suite: 216 tests, <1s. Made-with: Cursor --- agent-scope/README.md | 5 +- agent-scope/bin/task.mjs | 32 +---- agent-scope/lib/prompter.mjs | 71 +++++++++- agent-scope/lib/prompter.test.mjs | 221 ++++++++++++++++++++++++++++++ package.json | 2 +- 5 files changed, 299 insertions(+), 32 deletions(-) create mode 100644 agent-scope/lib/prompter.test.mjs diff --git a/agent-scope/README.md b/agent-scope/README.md index 37c81594f..c49d1002c 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -275,8 +275,9 @@ onboarding modes need interactive input. ### Flow 2 — smart onboarding (`pnpm task start --smart`) -The CLI prompts you for a multi-line task description (finish with an -empty line), then drops a one-shot marker at +The CLI prompts you for a task description (press Enter to submit — +multi-line pastes are captured in full via paste-detection), then drops +a one-shot marker at `agent-scope/.pending-onboarding` (gitignored) that *already embeds* your description inside a `=== USER TASK DESCRIPTION ===` block. The trigger text is also copied to your clipboard. diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 7ef46082e..71acb97bb 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -266,11 +266,11 @@ async function startSmart() { const prompter = createPrompter(); let description = ''; try { - console.log('Describe the task in detail — what to build or fix, which packages / behaviours'); - console.log('/ tests, and any files you already know about. Multi-line OK.'); - console.log('Finish with an empty line.'); + console.log('Describe the task — what to build or fix, which packages / behaviours'); + console.log('/ tests, and any files you already know about.'); + console.log('Press Enter to submit. (Multi-line pastes are captured in full.)'); console.log(''); - description = await readMultilineDescription(prompter); + description = await prompter.askPasteableDescription('> '); } finally { prompter.close(); } @@ -312,30 +312,6 @@ async function startSmart() { bootstrapWarning(); } -async function readMultilineDescription(prompter) { - // Read lines until we see a single empty line AFTER at least one non- - // empty line. Lets the user paste multi-paragraph text (paste usually - // ends with a blank line) or type naturally and hit Enter twice. - // - // Safety rails: cap iterations and stop on consecutive blanks before - // any content (prevents runaway loops when stdin is closed / EOF). - const lines = []; - let seenContent = false; - let blankRun = 0; - for (let i = 0; i < 2000; i++) { - const line = await prompter.ask('> '); - if (!line || !line.trim()) { - if (seenContent) break; - if (++blankRun >= 3) break; - continue; - } - blankRun = 0; - lines.push(line); - seenContent = true; - } - return lines.join('\n'); -} - async function startInteractive() { console.log('agent-scope: interactive task wizard'); console.log(' (no agent needed — hit Ctrl+C any time to cancel, nothing is saved until the final "save" step.)'); diff --git a/agent-scope/lib/prompter.mjs b/agent-scope/lib/prompter.mjs index 321a4b7e9..fa870b917 100644 --- a/agent-scope/lib/prompter.mjs +++ b/agent-scope/lib/prompter.mjs @@ -34,6 +34,31 @@ export function createPrompter({ waiters.push(r); }); + // Non-blocking: resolves with the next line if one arrives within + // `timeoutMs`, otherwise null. Used for paste-detection where we want + // to treat typed-and-Enter input as single-line but still capture + // pasted multi-line content (terminal pastes deliver each line as a + // separate `line` event within a few milliseconds). + const tryReadLine = (timeoutMs) => new Promise(resolve => { + if (buffered.length) return resolve(buffered.shift()); + if (closed) return resolve(null); + let settled = false; + const waiter = (line) => { + if (settled) return; + settled = true; + clearTimeout(t); + resolve(line); + }; + waiters.push(waiter); + const t = setTimeout(() => { + if (settled) return; + settled = true; + const idx = waiters.indexOf(waiter); + if (idx >= 0) waiters.splice(idx, 1); + resolve(null); + }, timeoutMs); + }); + const write = (s) => { try { output.write(s); } catch { /* ignore */ } }; async function ask(prompt, { default: dflt = '' } = {}) { @@ -80,6 +105,47 @@ export function createPrompter({ return [...new Set(nums)].sort((a, b) => a - b); } + // Read a task description with single-Enter submission and paste + // detection. Flow: + // 1. Block for the first non-empty line. Blank lines before any + // content are ignored, up to `maxBlankBeforeContent`. + // 2. After the first line, poll `tryReadLine(pasteQuietMs)` — if + // another line arrives inside that window it's part of a multi- + // line paste (terminal pastes deliver each line as a separate + // `line` event within a few ms). Keep appending; each new line + // resets the window. + // 3. As soon as the quiet window expires with no new line, stop. + // + // This means typing one line + Enter submits immediately (no more + // "press Enter twice"), while a multi-paragraph paste still gets + // captured in full. Trailing blank lines (common at the end of a + // paste) are trimmed. Internal blank lines (paragraph breaks) are + // preserved. + async function askPasteableDescription(prompt = '> ', { + pasteQuietMs = 80, + maxLines = 2000, + maxBlankBeforeContent = 3, + } = {}) { + const lines = []; + let emptyBeforeContent = 0; + + while (lines.length === 0) { + const line = await ask(prompt); + if (line && line.trim().length) { lines.push(line); break; } + if (++emptyBeforeContent >= maxBlankBeforeContent) return ''; + } + + while (lines.length < maxLines) { + const next = await tryReadLine(pasteQuietMs); + if (next === null) break; + if (next === '') { lines.push(''); continue; } + lines.push(next); + } + + while (lines.length && !lines[lines.length - 1].trim()) lines.pop(); + return lines.join('\n'); + } + // Read free-text lines until a blank line. Useful for "extra globs". async function askLines(headline, { hint } = {}) { if (headline) write(headline + '\n'); @@ -98,5 +164,8 @@ export function createPrompter({ function close() { try { rl.close(); } catch { /* ignore */ } } - return { ask, askYesNo, askChoice, askMultiNumber, askLines, close }; + return { + ask, askYesNo, askChoice, askMultiNumber, askLines, + askPasteableDescription, tryReadLine, close, + }; } diff --git a/agent-scope/lib/prompter.test.mjs b/agent-scope/lib/prompter.test.mjs new file mode 100644 index 000000000..f54182513 --- /dev/null +++ b/agent-scope/lib/prompter.test.mjs @@ -0,0 +1,221 @@ +// Unit tests for the tiny prompter. Focuses on the paste-detection +// primitives used by `pnpm task start --smart`'s description reader — +// i.e. the `tryReadLine(timeoutMs)` helper and its interaction with +// the blocking `ask()` path. + +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { PassThrough } from 'node:stream'; +import { createPrompter } from './prompter.mjs'; + +function makePrompter() { + const input = new PassThrough(); + const output = new PassThrough(); + // Drain output so writes don't back-pressure the PassThrough. + output.on('data', () => {}); + const p = createPrompter({ input, output }); + return { p, input, output }; +} + +function feed(input, line) { input.write(`${line}\n`); } + +test('tryReadLine: buffered line resolves synchronously (same tick)', async () => { + const { p, input } = makePrompter(); + feed(input, 'first'); + // Give the readline transform a tick to push the line event. + await new Promise(r => setImmediate(r)); + const got = await p.tryReadLine(500); + assert.equal(got, 'first'); + p.close(); +}); + +test('tryReadLine: returns null after timeout when no input', async () => { + const { p } = makePrompter(); + const t0 = Date.now(); + const got = await p.tryReadLine(60); + const elapsed = Date.now() - t0; + assert.equal(got, null); + // Should settle promptly — allow generous slack for slow CI. + assert.ok(elapsed >= 55, `expected >=55ms, got ${elapsed}`); + assert.ok(elapsed <= 400, `expected <=400ms, got ${elapsed}`); + p.close(); +}); + +test('tryReadLine: resolves when line arrives inside the window', async () => { + const { p, input } = makePrompter(); + setTimeout(() => feed(input, 'late-but-not-too-late'), 20); + const got = await p.tryReadLine(200); + assert.equal(got, 'late-but-not-too-late'); + p.close(); +}); + +test('tryReadLine: does NOT steal from later waiters after timeout', async () => { + const { p, input } = makePrompter(); + + // First call times out because nothing arrives. + const first = await p.tryReadLine(40); + assert.equal(first, null); + + // Now a real line arrives — it should route to the next reader, + // not some ghost of the timed-out waiter. + feed(input, 'hello'); + const got = await p.tryReadLine(200); + assert.equal(got, 'hello'); + p.close(); +}); + +test('tryReadLine: resolves null once the stream has been closed', async () => { + const { p, input } = makePrompter(); + input.end(); + // Let the readline 'close' event propagate. + await new Promise(r => setImmediate(r)); + const got = await p.tryReadLine(100); + assert.equal(got, null); + p.close(); +}); + +test('ask + tryReadLine compose: first line blocks, then we poll the tail', async () => { + const { p, input } = makePrompter(); + + // Mimic the smart-mode description reader: block for the first line, + // then collect any immediately-following lines (paste-detection). + setTimeout(() => { + feed(input, 'line A'); + feed(input, 'line B'); + feed(input, 'line C'); + }, 5); + + const first = await p.ask('> '); + const more = []; + for (;;) { + const next = await p.tryReadLine(40); + if (next === null) break; + more.push(next); + } + assert.equal(first, 'line A'); + assert.deepEqual(more, ['line B', 'line C']); + p.close(); +}); + +test('ask returns blank when stream closes with no input', async () => { + const { p, input } = makePrompter(); + setTimeout(() => input.end(), 10); + const got = await p.ask('> '); + assert.equal(got, ''); + p.close(); +}); + +// --- askPasteableDescription: single-Enter submission + paste detection --- + +test('askPasteableDescription: single line + one Enter submits immediately', async () => { + const { p, input } = makePrompter(); + setTimeout(() => feed(input, 'Refactor peer sync for workspace auth'), 5); + const got = await p.askPasteableDescription('> ', { pasteQuietMs: 40 }); + assert.equal(got, 'Refactor peer sync for workspace auth'); + p.close(); +}); + +test('askPasteableDescription: multi-line paste is captured in full', async () => { + const { p, input } = makePrompter(); + setTimeout(() => { + feed(input, 'line one'); + feed(input, 'line two'); + feed(input, 'line three'); + }, 5); + const got = await p.askPasteableDescription('> ', { pasteQuietMs: 60 }); + assert.equal(got, 'line one\nline two\nline three'); + p.close(); +}); + +test('askPasteableDescription: blank line in middle of paste is preserved', async () => { + const { p, input } = makePrompter(); + setTimeout(() => { + feed(input, 'paragraph 1'); + feed(input, ''); + feed(input, 'paragraph 2'); + }, 5); + const got = await p.askPasteableDescription('> ', { pasteQuietMs: 60 }); + assert.equal(got, 'paragraph 1\n\nparagraph 2'); + p.close(); +}); + +test('askPasteableDescription: trailing blank lines are trimmed', async () => { + const { p, input } = makePrompter(); + setTimeout(() => { + feed(input, 'content'); + feed(input, ''); + feed(input, ''); + }, 5); + const got = await p.askPasteableDescription('> ', { pasteQuietMs: 50 }); + assert.equal(got, 'content'); + p.close(); +}); + +test('askPasteableDescription: leading blanks ignored up to maxBlankBeforeContent', async () => { + const { p, input } = makePrompter(); + setTimeout(() => { + feed(input, ''); + feed(input, ''); + feed(input, 'finally'); + }, 5); + const got = await p.askPasteableDescription('> ', { + pasteQuietMs: 50, + maxBlankBeforeContent: 5, + }); + assert.equal(got, 'finally'); + p.close(); +}); + +test('askPasteableDescription: bails empty-string after maxBlankBeforeContent', async () => { + const { p, input } = makePrompter(); + setTimeout(() => { + feed(input, ''); + feed(input, ''); + feed(input, ''); + }, 5); + const got = await p.askPasteableDescription('> ', { + pasteQuietMs: 50, + maxBlankBeforeContent: 3, + }); + assert.equal(got, ''); + p.close(); +}); + +test('askPasteableDescription: late-arriving line INSIDE quiet window is appended', async () => { + const { p, input } = makePrompter(); + setTimeout(() => feed(input, 'first'), 5); + setTimeout(() => feed(input, 'second (just inside window)'), 40); + const got = await p.askPasteableDescription('> ', { pasteQuietMs: 100 }); + assert.equal(got, 'first\nsecond (just inside window)'); + p.close(); +}); + +test('askPasteableDescription: line arriving AFTER quiet window is NOT appended', async () => { + const { p, input } = makePrompter(); + setTimeout(() => feed(input, 'only this'), 5); + // Give enough time for the first read + the quiet window to elapse + // before sending the second line. + setTimeout(() => feed(input, 'too late, separate turn'), 200); + const got = await p.askPasteableDescription('> ', { pasteQuietMs: 40 }); + assert.equal(got, 'only this'); + p.close(); +}); + +test('askPasteableDescription: respects maxLines cap on a runaway paste', async () => { + const { p, input } = makePrompter(); + // Keep feeding lines forever (every few ms) — cap stops the reader. + let i = 0; + const iv = setInterval(() => feed(input, `L${i++}`), 5); + try { + const got = await p.askPasteableDescription('> ', { + pasteQuietMs: 40, + maxLines: 5, + }); + const lines = got.split('\n'); + assert.equal(lines.length, 5); + assert.ok(lines.every(l => /^L\d+$/.test(l)), `unexpected lines: ${got}`); + } finally { + clearInterval(iv); + p.close(); + } +}); diff --git a/package.json b/package.json index 904783c43..199c1db07 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", "task": "node agent-scope/bin/task.mjs", - "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs agent-scope/lib/wizard.test.mjs", + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs agent-scope/lib/wizard.test.mjs agent-scope/lib/prompter.test.mjs", "scope:validate": "node agent-scope/bin/task.mjs validate", "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show", "scope:check-agent": "node agent-scope/bin/task.mjs check-agent" From ba7da991adccf045f19bc04cb9ef52f8f752bf88 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 12:54:59 +0200 Subject: [PATCH 10/21] agent-scope: one-question / two-option plan-mode prompts (simpler UX) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The plan-mode AskQuestion surface was too busy. Onboarding asked a two-part question with a multi-select package list plus a seven-option action menu. Denials asked agents to include a 5-bullet prompt (denied path + why restricted + agent reasoning + recommendation + full options list), where "full options" could mean up to 6 verbose entries like `Add "packages/foo/bar.ts" to my-task's manifest`. The prose carried ALL-CAPS banners ("PROTECTED PATH —", "OUT OF TASK SCOPE —", "STOP.") and meta copy ("Agent: surface the menu below via AskQuestion"). On both ends — the user's and the LLM's — it read like a compliance form. The user asked for something closer to plan mode: one question, two options — the LLM's recommendation and "something else — tell me what" — phrased like a human chatting with a coworker. Changes: - `agent-scope/lib/denial.mjs` now emits two new fields in every structured denial payload: - `humanSummary`: one or two natural-language sentences describing the situation. Agents are told to quote it verbatim in the AskQuestion prompt. Replaces the old multi-block prose (banners, "Why this file is guarded", "What happens if the user says YES/NO", file lists, STOP notices, agent-directed copy). - `simpleOptions`: always exactly two entries — the recommended action (with a short casual label like "Add this folder to the task and try again", "Skip it", "Yes, unlock it so I can do this edit") and a `custom_instruction` free-text fallback labelled "Something else — tell me what". The verbose `options` list stays for audit / back-compat / tests but is not surfaced to the user. - Rendered prose is now a one-line `agent-scope: ` plus the fenced JSON. No banners. No agent-directed meta copy. After-shell context still lists reverted/deleted paths below the summary for reference. - `.cursor/rules/agent-scope.mdc`, `CLAUDE.md`, `AGENTS.md`: - Smart-onboarding step 4 is now "one AskQuestion, two options": `go` ("Yes, go with that") + `custom_instruction` ("Tell me what to change"). The prompt is a 3-sentence max rephrase + scope bullets + "Sound good?" - Denial protocol step 3 now reads: quote `humanSummary` verbatim, add one short sentence of your own reasoning, pass `simpleOptions` verbatim. Never surface the verbose `options` list. - New "Phrasing rules" section: no ALL-CAPS banners, no architecture explanations in prompts, one sentence of reasoning, no emoji unless the user uses them first. - `agent-scope/lib/onboarding.mjs` trigger payload matches the new protocol (was instructing the agent to ask Q1/Q2 and print the command for the user to run manually — both outdated). - `agent-scope/bin/task.mjs` description prompt is now the casual "What are you working on?" / "One or two sentences is plenty. Paste longer briefs if you have them." / "Press Enter to send." rather than the old "Describe the task — what to build or fix, which packages / behaviours / tests, and any files you already know about." - `agent-scope/README.md`: onboarding-flow + denial-menu sections rewritten to document `humanSummary` and `simpleOptions` as the user-facing surface, with a worked example. Tests: - `denial.test.mjs`: banner-era assertions (`OUT OF TASK SCOPE`, `Why this file is guarded`, `Reverted via`) replaced with asserts that `humanSummary` is present, short (<= 400 chars), mentions the denied path/task, has no banners, and is quoted in the rendered prose. Three new test cases cover the `simpleOptions` invariant (exactly two entries; first matches `recommendedOptionId`; second is always `custom_instruction`), the natural-label mapping (add_glob → "Add this folder to the task and try again", cancel → "Skip it", etc.), and the `humanSummary`-shape contract across all five builders. - Full suite: 219 tests, ~1.2s. Made-with: Cursor --- .cursor/rules/agent-scope.mdc | 197 ++++++++++++++---------------- AGENTS.md | 85 ++++++++----- CLAUDE.md | 139 +++++++++++---------- agent-scope/README.md | 75 +++++++----- agent-scope/bin/task.mjs | 6 +- agent-scope/lib/denial.mjs | 207 ++++++++++++++++++++------------ agent-scope/lib/denial.test.mjs | 126 +++++++++++++++++-- agent-scope/lib/onboarding.mjs | 52 ++++---- 8 files changed, 537 insertions(+), 350 deletions(-) diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index f80a9ca5b..820398fd4 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -112,45 +112,34 @@ Follow these steps exactly: - Inherits `base` unless there is a reason not to (base supplies the standard exemptions: `**/dist/**`, lockfiles, build artefacts) -4. **Propose the scope via a SINGLE `AskQuestion` call with TWO questions.** - - **Q1 — package picker (multi-select).** - - `id`: `packages` - - `allow_multiple`: `true` - - `prompt`: `"Which packages should be writable for this task?"` — - directly above the list, include a one-sentence rephrasing of the - user's description and the proposed task id. - - `options`: one entry per candidate package, labelled - `" files match"` with a sample of 2-3 relevant file - paths in the label where it helps. Pre-check the packages you've - already decided to include by listing them first and describing them - as the recommended selection in the prompt. - - Include a final option `other` labelled - `"None of the above / I'll specify files manually"` so the user can - escape the picker. - - **Q2 — action (single-select).** - - `id`: `action` +4. **Propose the scope via a SINGLE `AskQuestion` call with ONE question + and TWO options.** Keep it short and natural — like you're asking a + coworker, not filling out a form. Do NOT include a second question, do + NOT list every package individually, do NOT dump JSON into the prompt. + + - `id`: `scope` - `allow_multiple`: `false` - - `prompt`: `"Action?"` - - `options` (IDs must match exactly): - - `approve` — `"Create + activate this scope"` - - `show_json` — `"Show the full manifest JSON first"` - - `edit_globs` — `"Let me hand-edit the allowed/deny globs"` - - `widen` — `"Let me add another package/file"` - - `narrow` — `"Let me remove a package/file"` - - `cancel` — `"Abort, no task"` - - `custom_instruction` — `"Let me type my own instruction"` - - Your recommendation should appear inline in the prompt - ("I recommend `approve`"). - -5. **On `approve` + the Q1 package selection:** **YOU (the agent) run the - command directly** via the Shell tool. The `afterShellExecution` hook - has a narrow allowlist for the canonical task-create invocation: - `pnpm task create ...` and `node agent-scope/bin/task.mjs create - ...` are the ONLY shapes whose writes to `agent-scope/tasks/.json` - and `agent-scope/active` may persist. The command's `--allowed` flags - must match Q1 verbatim (any divergence is a protocol violation): + - `prompt`: two short sentences. First: a one-line rephrase of what you + understood the task to be. Second: the scope you'd propose, as a short + bullet list of globs (3–5 lines max). Finish with "Sound good?" Example: + + > Refactor peer sync to use the new workspace auth. I'd scope it to: + > • `packages/agent/**` + > • `packages/core/**` + > + > Sound good? + + - `options` (IDs must match exactly — only these two): + - `go` — `"Yes, go with that"` + - `custom_instruction` — `"Tell me what to change"` + +5. **On `go`:** **YOU (the agent) run the command directly** via the Shell + tool. The `afterShellExecution` hook has a narrow allowlist for the + canonical task-create invocation: `pnpm task create ...` and + `node agent-scope/bin/task.mjs create ...` are the ONLY shapes whose + writes to `agent-scope/tasks/.json` and `agent-scope/active` may + persist. Use the exact globs you proposed (any divergence is a protocol + violation): ```bash pnpm task create \ @@ -163,22 +152,13 @@ Follow these steps exactly: ``` After the command succeeds, continue with the actual work in the same - turn. Do NOT bounce the command back to the user — they already - approved the scope via AskQuestion. If the command fails, surface the - error and re-ask via AskQuestion instead of retrying blindly. - -6. **On `show_json`:** print the drafted manifest JSON, then re-ask the - same two questions. - -7. **On `edit_globs` / `widen` / `narrow`:** ask one targeted follow-up in - chat ("Which globs would you like to change / add / remove?"), apply - the user's answer to the draft, then re-ask the same two questions. - -8. **On `cancel`:** acknowledge and continue working without a task (the - system stays invisible). + turn. If the command fails, surface the error and re-ask via + AskQuestion instead of retrying blindly. -9. **On `custom_instruction`:** ask the user in plain chat what they want - instead, then do that. +6. **On `custom_instruction`:** ask the user in plain chat what they want + changed — e.g. `"What would you like different? (add/remove packages, + tighten globs, different task id, whatever.)"` Then apply their reply to + the draft and re-ask step 4 (still a single question, still two options). From step 5 onward, the active task is set and the plan-mode denial protocol (below) governs every future write. @@ -186,22 +166,14 @@ protocol (below) governs every future write. ## Plan-mode denial protocol — MANDATORY once a task is active Every denial message from agent-scope carries a structured menu. You must -**stop**, parse it, and surface it via `AskQuestion`. Do not retry, rewrite, -or work around a denial — the defense-in-depth layers will revert or delete -anything that slips through anyway. +**stop**, parse it, and surface it via `AskQuestion` — one question, two +options, short human prose. Do not retry, rewrite, or work around a denial +— the defense-in-depth layers will revert or delete anything that slips +through anyway. ### Detecting a denial -- `preToolUse` returned `{ permission: "deny" }` where `agent_message` starts - with `OUT OF TASK SCOPE` or `PROTECTED PATH`. -- `beforeShellExecution` returned `{ permission: "deny" }` whose - `agent_message` starts with `Destructive shell command blocked`. -- `afterShellExecution` returned an `additional_context` starting with - `agent-scope: shell command modified`. - -### Parsing the menu - -Every such message contains a fenced JSON block: +A denial always contains a fenced JSON block: ``` @@ -209,32 +181,34 @@ Every such message contains a fenced JSON block: ``` -The JSON has this shape (TypeScript for clarity): +Plus a one-line prose summary starting with `agent-scope:` that you can +also key off: + +- `preToolUse` / `beforeShellExecution` return `{ permission: "deny" }` + with an `agent_message` containing the fence. +- `afterShellExecution` returns an `additional_context` containing the + fence. Files have already been reverted or deleted. + +### Parsing the menu + +The JSON shape (TypeScript for clarity): ```ts { version: 1, hook: "preToolUse" | "beforeShellExecution" | "afterShellExecution", reason: "out-of-scope" | "protected" | "manifest-load-error" | "unknown", - deniedPath?: string, // preToolUse - command?: string, // shell hooks + humanSummary: string, // short, natural — QUOTE THIS in your prompt + deniedPath?: string, + command?: string, activeTask: string | null, - suggestedGlob?: string, - alternativeTasks?: { id: string, description: string }[], - violations?: { cmd, path, decision }[], // shell-precheck - reverted?: string[], // after-shell - deleted?: string[], // after-shell - options: { - id: string, - label: string, - action: { - kind: "add_to_manifest" | "switch_task" | "skip" | "cancel" - | "bootstrap" | "fix_manifest" | "clear_task" | "custom", - task?: string, patterns?: string[], instruction?: string, error?: string, - } - }[], - recommendedOptionId: string, // suggested default (out-of-scope → add_glob, protected → cancel, load-error → fix_manifest) - agentReasoning: null, // placeholder — you fill this in via the AskQuestion prompt, see below + simpleOptions: [ // exactly two entries — SURFACE THESE + { id: string, label: string, action: { kind: "..." /* + fields */ } }, + { id: "custom_instruction", label: "Something else — tell me what", + action: { kind: "custom" } } + ], + recommendedOptionId: string, // matches simpleOptions[0].id + options: [ /* full verbose list — do NOT surface to the user */ ], } ``` @@ -242,22 +216,22 @@ The JSON has this shape (TypeScript for clarity): 1. **Stop.** Do not retry the operation with another tool or command form. 2. **Extract the JSON** from between `agent-scope-menu:begin` and `:end`. -3. **Call `AskQuestion`** with a single question whose prompt **must include**: - - The denied path (or command). - - **Why it's restricted**: for `reason: "protected"` denials, read the - prose `Why this file is guarded` block and summarise it for the user - (use `protectedRole` / `protectedKind` from the structured JSON). For - `reason: "out-of-scope"` denials, state that the active task's manifest - does not list this path. - - **Your reasoning** in 1–2 sentences: why you wanted to touch this file, - what you were trying to accomplish. This is the plan-mode "here's what I - was thinking" that the user needs to make an informed decision. - - Your **recommendation**: which option you'd pick if you were the user, - and why. Lead with the JSON's `recommendedOptionId` unless you have a - concrete reason to override it. - - The options array, verbatim from the JSON — use each entry's `id` as the - option id and `label` as the display label. For protected denials the - labels are already phrased as Yes / No / No-but-continue / Custom. +3. **Call `AskQuestion`** with ONE question and TWO options: + - `prompt`: quote `humanSummary` verbatim, then add **one short + sentence** of your own reasoning (why you wanted to do this), then + finish with a simple ask. Keep the whole prompt to 3 sentences max. + Example: + + > I'd like to edit `packages/evm-module/contracts/S.sol`, but the + > active task `sync` doesn't cover that file. I was trying to update + > the staking integration referenced in the PR. Want me to add it to + > the scope and continue? + + - `options`: pass `simpleOptions` from the JSON, verbatim. It always + has exactly two entries — the recommended action and a free-text + fallback. Do NOT surface the verbose `options` list. Do NOT add or + reword options. + 4. **Act on the user's choice** using the matching `action` object: - `add_to_manifest` → edit `agent-scope/tasks/.json`, append the listed patterns to `allowed`, save, then retry the original operation. @@ -272,8 +246,21 @@ The JSON has this shape (TypeScript for clarity): - `cancel` → stop the turn; summarise what got done and what didn't. - `custom` → ask the user in plain chat: "OK, what should I do instead?" Wait for their free-text reply, then carry out whatever they say. -5. **Never invent options.** If none of the presets match and `custom` isn't - present (shouldn't happen — it's in every menu), pick `cancel`. + +5. **Never invent options.** If neither `simpleOptions` entry matches the + user's intent, the `custom_instruction` entry is always there — route + through it. + +### Phrasing rules (for onboarding AND denials) + +- Write like you're texting a coworker, not filing a ticket. +- One short question. One recommendation. One "something else" option. +- Never use ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). +- Don't explain the internal architecture in the prompt — the user doesn't + need to know about hooks or manifests to answer. Keep it action-focused: + "I want to do X. Should I?" +- If you need to say why something is restricted, one sentence is enough. +- No emoji unless the user uses them first. ## What counts as a write @@ -333,6 +320,6 @@ they can commit and push normally. the menu's `switch_task` action, or via `pnpm task set`. - Don't add patterns to a manifest to unblock yourself — always confirm via the plan-mode menu first. -- Don't run `pnpm task create` yourself for a new task — the user runs it, so - the manifest is human-authored and the afterShell backstop doesn't wipe it. +- Don't surface the verbose `options` list in AskQuestion — always use + `simpleOptions` (two entries: recommendation + "something else"). - Don't retry a blocked operation with a different tool or command form. diff --git a/AGENTS.md b/AGENTS.md index 2c8f264ed..e3c01b0fb 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -92,20 +92,24 @@ active: 3. Draft a conservative set of allowed globs. Prefer whole-package globs (`packages//**`). Inherit from `base`. Always append `!**/secrets.*` and `!**/.env*`. -4. Propose the scope to the user as a **two-part question** (use - whatever multi-select + single-select UI your client supports): - - **Q1 (multi-select, "Which packages should be writable?"):** one - option per candidate package labelled - ` files match`, with 2-3 sample paths inline where - helpful. Pre-select the packages you already decided to include. - Include a `None of the above / I'll specify manually` escape - option. - - **Q2 (single-select, "Action?"):** `approve`, `show_json`, - `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. -5. On `approve` + the Q1 package selection: **run the command yourself** - via your shell tool, then continue with the actual work in the same - turn. The command's `--allowed` flags must match Q1 verbatim (the user - approved a specific scope — altering it is a protocol violation): +4. Propose the scope to the user as **one short question with two + options**. Write it like you're asking a coworker, not filling out a + form. 3 sentences max: one-line rephrase of the task, the scope you'd + propose (3–5 bullet globs), then "Sound good?" Example: + + > Refactor peer sync to use the new workspace auth. I'd scope it to: + > • `packages/agent/**` + > • `packages/core/**` + > + > Sound good? + + Options (only these two, IDs exactly): + - `go` — `"Yes, go with that"` + - `custom_instruction` — `"Tell me what to change"` + +5. On `go`: **run the command yourself** via your shell tool, then + continue with the actual work in the same turn. The command's + `--allowed` flags must match your proposed scope verbatim: ```bash pnpm task create \ @@ -116,8 +120,11 @@ active: --activate ``` - If the command fails, surface the error and re-ask via AskQuestion - instead of retrying blindly. + If the command fails, surface the error and re-ask (still one short + question, two options) — do not retry blindly. + + On `custom_instruction`: ask the user in plain chat what they want + changed, apply it to the draft, then re-ask step 4. > 🛈 **On Cursor / Claude Code the `afterShellExecution` / PostToolUse Bash > hook has a narrow allowlist** so this one invocation can persist the @@ -137,28 +144,44 @@ active: ## Plan-mode denial protocol When a write is denied (whether by a hard hook or by your own self-check), -the denial message contains a fenced JSON block: +the denial message starts with an `agent-scope:` summary line and contains +a fenced JSON block: ``` -{ ... JSON payload with options[] and recommendedOptionId ... } +{ humanSummary, simpleOptions, recommendedOptionId, options, ... } ``` When you see this, STOP. Do not retry, rewrite, or work around the denial. -Surface a structured menu to the user via whatever question/option mechanism -your client supports. Include: - -- The denied path or command. -- **Why it's restricted** — protected? out of task scope? broken manifest? -- **Your reasoning** — 1–2 sentences on why you wanted to touch the file - and what you were trying to accomplish. -- **Your recommendation** — usually the JSON's `recommendedOptionId`. -- The full `options` array verbatim. - -Wait for the user's choice. Match their answer to one of the listed -options. If nothing fits, ask them what they want instead — never invent -an option that wasn't listed. +Ask the user **one short question with the two `simpleOptions` entries +verbatim** — never surface the verbose `options` list: + +- Prompt = `humanSummary` verbatim + one short sentence of your own + reasoning (why you wanted to do this) + a simple ask. Keep the whole + prompt to 3 sentences max. Example: + + > I'd like to edit `packages/evm-module/contracts/S.sol`, but the + > active task `sync` doesn't cover that file. I was trying to update + > the staking integration the PR depends on. Want me to add it and + > continue? + +- Options = `simpleOptions` verbatim (exactly two entries: the + recommendation and "Something else — tell me what"). + +Match the user's answer to the chosen `action.kind` and carry it out. If +they pick `custom_instruction`, ask them in plain chat what they'd like +instead and follow their reply. Never invent options. + +### Phrasing rules (onboarding AND denials) + +- Write like you're texting a coworker. One short question, one + recommendation, one "something else" option. +- No ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). +- Don't explain internal architecture in the prompt. The user doesn't + need to know about hooks or manifests to answer. +- One sentence is enough to say why something is restricted. +- No emoji unless the user uses them first. ## CLI quick reference diff --git a/CLAUDE.md b/CLAUDE.md index cd8918324..8b9eb259d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -267,27 +267,28 @@ protocol below. slightly broad; prefer whole-package globs (`packages//**`) over file-level globs; inherit `base`; always append `!**/secrets.*`, `!**/.env*`. -4. **Propose the scope via a SINGLE `AskQuestion` call with TWO questions.** - - **Q1 — packages (multi-select).** `id: "packages"`, - `allow_multiple: true`, `prompt: "Which packages should be writable - for this task?"` Include a one-line rephrase of the description and - the suggested task id in the prompt. Options: one per candidate - package labelled `" files match"`, with a sample of - relevant paths in the label where helpful. List the recommended - packages first and say so in the prompt. - - **Q2 — action (single-select).** `id: "action"`, - `allow_multiple: false`, `prompt: "Action?"`. Options (IDs must - match exactly): `approve`, `show_json`, `edit_globs`, `widen`, - `narrow`, `cancel`, `custom_instruction`. Recommend `approve` in the - prompt. -5. **On `approve`** + the Q1 package selection, **YOU (the agent) run the - command yourself** via the Shell tool. The `afterShellExecution` hook - has a narrow allowlist for exactly this invocation: a canonical - `pnpm task create ...` or +4. **Propose the scope via a SINGLE `AskQuestion` — one question, two + options.** Write it like you're asking a coworker, not filling out a + form. 3 sentences max: one-line rephrase of the task, the scope you'd + propose (3–5 bullet globs), then "Sound good?" Example: + + > Refactor peer sync to use the new workspace auth. I'd scope it to: + > • `packages/agent/**` + > • `packages/core/**` + > + > Sound good? + + Options (IDs must match exactly — only these two): + - `go` — `"Yes, go with that"` + - `custom_instruction` — `"Tell me what to change"` + +5. **On `go`**, **YOU (the agent) run the command yourself** via the + Shell tool. The `afterShellExecution` hook has a narrow allowlist for + exactly this invocation: a canonical `pnpm task create ...` or `node agent-scope/bin/task.mjs create ...` is the ONLY shape that may persist new files under `agent-scope/tasks/` and `agent-scope/active`. - The command must match Q1's package selection verbatim (any deviation - is a protocol violation — the user approved a specific scope): + The command's `--allowed` flags must match your proposed scope verbatim + (any deviation is a protocol violation): ```bash pnpm task create \ @@ -298,32 +299,23 @@ protocol below. --activate ``` - After the command succeeds (exit 0, manifest shown), continue with the - actual work in the same turn. Do NOT bounce the command back to the - user — they already approved it via the AskQuestion. If the command - fails for any reason (schema error, collision, etc.), surface the - error and re-ask via AskQuestion instead of retrying blindly. -6. **On `show_json`**, print the drafted manifest, then re-ask both - questions. -7. **On `edit_globs` / `widen` / `narrow`**, ask one targeted follow-up in - chat, update the draft, then re-ask both questions. -8. **On `cancel`**, acknowledge and keep working without a task. -9. **On `custom_instruction`**, ask in plain chat what they want instead. + After the command succeeds, continue with the actual work in the same + turn. If the command fails, surface the error and re-ask via + AskQuestion instead of retrying blindly. -### Plan-mode denial protocol (runs for every agent-scope denial) +6. **On `custom_instruction`**, ask the user in plain chat what they want + changed (packages, globs, task id, whatever), apply it to the draft, + then re-ask step 4 — still one short question with two options. -When any of these happen, stop and surface a menu. Do NOT retry, rewrite, or -work around the denial — the defense-in-depth layers revert tracked changes -and delete untracked files in denied paths anyway: +### Plan-mode denial protocol (runs for every agent-scope denial) -- `preToolUse` returned `{ permission: "deny" }` with `OUT OF TASK SCOPE` or - `PROTECTED PATH` in the message. -- `beforeShellExecution` returned `{ permission: "deny" }` with - `Destructive shell command blocked` in the message. -- `afterShellExecution` returned `additional_context` starting with - `agent-scope: shell command modified`. +When agent-scope blocks a write or reverts a shell command, stop and +surface a short menu. Do NOT retry, rewrite, or work around the denial — +the defense-in-depth layers revert tracked changes and delete untracked +files in denied paths anyway. -Every such message contains a fenced JSON block: +Every denial message starts with `agent-scope:` prose and contains a +fenced JSON block: ``` @@ -331,29 +323,39 @@ Every such message contains a fenced JSON block: ``` -The JSON has `options[]` and `recommendedOptionId`. It also has a placeholder -`agentReasoning: null` — you fill this in by including your reasoning in the -AskQuestion prompt (see below). +The JSON shape (key fields only): + +```ts +{ + humanSummary: string, // QUOTE this in your AskQuestion prompt + simpleOptions: [ // exactly two entries — SURFACE these + { id, label, action }, // the recommended option + { id: "custom_instruction", // free-text fallback (always present) + label: "Something else — tell me what", + action: { kind: "custom" } } + ], + recommendedOptionId: string, // matches simpleOptions[0].id + reason: "out-of-scope" | "protected" | "manifest-load-error" | "unknown", + deniedPath?, command?, activeTask, options: [ /* verbose — do NOT surface */ ], +} +``` **Protocol:** 1. **Stop.** Do not retry via another tool or command form. 2. **Extract the JSON.** Parse between the fences. -3. **Call `AskQuestion`** with ONE question whose prompt **must include**: - - The denied path / command. - - **Why it's restricted** — for `reason: "protected"` denials, summarise - the `Why this file is guarded` prose block (use `protectedRole` / - `protectedKind` from the structured JSON for a concrete label). For - `reason: "out-of-scope"` denials, state that the active task's manifest - does not list this path. - - **Your reasoning in 1–2 sentences** — why you wanted to touch this file, - what you were trying to accomplish. This is the "here's what I was - thinking" that the user needs to make an informed decision. - - **Your recommendation** — lead with the JSON's `recommendedOptionId` - unless you have a concrete reason to override it. - - The full `options` array, verbatim — use each entry's `id`/`label`. For - protected denials the labels are pre-phrased as Yes / No / No-but-skip - / custom so the prompt reads as a plain yes/no question. +3. **Call `AskQuestion`** — ONE question, the TWO `simpleOptions` entries + verbatim. Prompt = `humanSummary` verbatim, plus one short sentence of + your own reasoning (why you wanted to do this), plus a simple ask. + Keep the whole prompt to 3 sentences max. Example: + + > I'd like to edit `packages/evm-module/contracts/S.sol`, but the + > active task `sync` doesn't cover that file. I was trying to update + > the staking integration the PR depends on. Want me to add it and + > continue? + + Do NOT surface the verbose `options` list. Do NOT add or rewrite options. + 4. **Act on the user's choice** by matching the `action.kind`: - `add_to_manifest` → edit `agent-scope/tasks/.json`, append patterns to `allowed`, retry. @@ -364,10 +366,21 @@ AskQuestion prompt (see below). - `clear_task` → `pnpm task clear`. - `skip` → acknowledge, move on. - `cancel` → stop the turn, summarise. - - `custom` → ask the user in plain chat "what should I do instead?", do - what they say. -5. **Never invent options.** If nothing fits and no `custom` is listed (it - always is), pick `cancel`. + - `custom` → ask the user in plain chat "OK, what should I do instead?" + Wait for their free-text reply, then carry out whatever they say. + +5. **Never invent options.** The `custom_instruction` entry is always in + `simpleOptions` — route through it when neither side fits. + +### Phrasing rules (onboarding AND denials) + +- Write like you're texting a coworker. One short question. One + recommendation. One "something else" option. +- Never use ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). +- Don't explain internal architecture in the prompt. The user doesn't + need to know about hooks or manifests to answer. +- One sentence is enough to say why something is restricted. +- No emoji unless the user uses them first. ### CLI quick reference diff --git a/agent-scope/README.md b/agent-scope/README.md index c49d1002c..9beaf4eab 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -298,20 +298,20 @@ The agent then follows a fixed protocol (defined in 1. Reads your description from the marker (does NOT ask you again). 2. Explores the codebase (Grep / Glob / SemanticSearch / DKG) to find relevant files. Counts matching files per candidate package. -3. Proposes a scope via a **two-part `AskQuestion`**: - - **Q1 — packages (multi-select):** pick which packages should be - writable. Each option shows the package path, file-match count, and - (where helpful) 2-3 sample file paths. The packages the agent - already decided to include are pre-selected. - - **Q2 — action (single-select):** `approve`, `show_json`, - `edit_globs`, `widen`, `narrow`, `cancel`, `custom_instruction`. -4. On `approve`, the agent itself runs `pnpm task create ...` via - the shell tool. The `afterShellExecution` / PostToolUse-Bash hooks +3. Proposes a scope via a **single short `AskQuestion`** — one question, + two options. The prompt is one-line rephrase of the task + the scope + as 3–5 bullet globs + "Sound good?" The options are: + - `go` — "Yes, go with that" + - `custom_instruction` — "Tell me what to change" +4. On `go`, the agent itself runs `pnpm task create ...` via the + shell tool. The `afterShellExecution` / PostToolUse-Bash hooks recognise the canonical task-create invocation and allow its two specific writes (`agent-scope/tasks/.json` and `agent-scope/active`) to persist; every other write to those paths is still reverted. See the "approved-task-create allowlist" section for details. -5. The agent starts the real work in the same turn. +5. On `custom_instruction`, the agent asks in plain chat what you'd like + changed, updates the draft, and re-asks step 3. +6. Once approved, the agent starts the real work in the same turn. From here, every attempted write to an out-of-scope file triggers a plan-mode AskQuestion menu — see **Escalation** below. @@ -457,10 +457,11 @@ Forward denials to a DKG node / Slack / log aggregator by setting ## Escalation — plan-mode denial menu Every denial (preToolUse, beforeShellExecution, afterShellExecution) emits both -a human-readable prose block **and** a machine-readable JSON menu embedded in -the hook's response. Agents following `.cursor/rules/agent-scope.mdc` (and -`CLAUDE.md`) must parse the menu and surface it to the user via the same -`AskQuestion` mechanism Cursor uses for plan mode. +a short human summary **and** a machine-readable JSON menu embedded in the +hook's response. Agents following `.cursor/rules/agent-scope.mdc`, +`CLAUDE.md`, or `AGENTS.md` must parse the menu and surface it via their +client's plan-mode-style question mechanism (`AskQuestion` in Cursor) — +**one question, two options**. The structured block is fenced by HTML comments so it's trivial to locate: @@ -470,17 +471,17 @@ The structured block is fenced by HTML comments so it's trivial to locate: "version": 1, "hook": "preToolUse", "reason": "out-of-scope", + "humanSummary": "I'd like to edit `packages/evm-module/contracts/Staking.sol`, but the active task `sync-refactor` doesn't cover that file.", "deniedPath": "packages/evm-module/contracts/Staking.sol", "activeTask": "sync-refactor", "suggestedGlob": "packages/evm-module/contracts/**", "alternativeTasks": [ { "id": "staking", "description": "..." } ], + "simpleOptions": [ + { "id": "add_glob", "label": "Add this folder to the task and try again", "action": { "kind": "add_to_manifest", ... } }, + { "id": "custom_instruction", "label": "Something else — tell me what", "action": { "kind": "custom" } } + ], "options": [ - { "id": "add_file", "label": "...", "action": { "kind": "add_to_manifest", ... } }, - { "id": "add_glob", "label": "...", "action": { "kind": "add_to_manifest", ... } }, - { "id": "switch_task_staking","label": "...", "action": { "kind": "switch_task", "task": "staking" } }, - { "id": "skip", "label": "...", "action": { "kind": "skip" } }, - { "id": "cancel", "label": "...", "action": { "kind": "cancel" } }, - { "id": "custom_instruction", "label": "Let me type my own instruction", "action": { "kind": "custom" } } + /* full verbose list — audit/back-compat only, NOT surfaced to users */ ], "recommendedOptionId": "add_glob", "agentReasoning": null @@ -503,14 +504,25 @@ Possible `action.kind` values: Extra guidance in the block: -- `recommendedOptionId` is a hint for which option to highlight. It's chosen - conservatively (`add_glob` for out-of-scope, `cancel` for protected, - `fix_manifest` for manifest-load errors). The agent is told to lead with it - unless overriding has a concrete reason. -- `agentReasoning: null` is a placeholder. The hook can't know the agent's - reasoning, so the agent **fills it in when surfacing the menu via - `AskQuestion`**: the prompt must include a 1–2 sentence "here's what I was - trying to do and why this file came up". Plan-mode equivalent. +- `humanSummary` is the one-line natural-language framing of the situation. + The agent is told to **quote this verbatim** in the AskQuestion prompt and + add one short sentence of their own reasoning (why they wanted to do it). + Keep the whole prompt to 3 sentences max. +- `simpleOptions` always has **exactly two entries**: the LLM-recommended + action (short human label like "Add this folder to the task and try + again", "Skip it", "Yes, unlock it so I can do this edit") and a + free-text fallback `custom_instruction` → `"Something else — tell me + what"`. Agents surface these two options and **never** surface the + verbose `options` list. +- `options` is the verbose, audit-grade list (add_file, add_glob, switch + tasks, skip, cancel, bootstrap, fix_manifest, clear_task, custom). It is + preserved for back-compat, tests, and anyone inspecting the JSON + directly — but not intended for end-user display. +- `recommendedOptionId` is the id of `simpleOptions[0]`. Chosen + conservatively: `add_glob` for out-of-scope, `cancel` for protected, + `fix_manifest` for manifest-load errors. +- `agentReasoning: null` is a placeholder the agent overwrites when + quoting it in their prompt. Heuristics (in `agent-scope/lib/denial.mjs`): @@ -518,14 +530,15 @@ Heuristics (in `agent-scope/lib/denial.mjs`): (`dirname/**`). - `alternativeTasks` lists up to 3 other manifests that already cover the denied path. -- `protected` reasons offer only `bootstrap` / `skip` / `cancel` / - `custom_instruction` — no other option can legitimately unblock the write. +- Protected denials recommend `cancel` by default — the user must + explicitly opt into `bootstrap` via the "something else" free-text + fallback if they want to unlock the system. Builders and tests live alongside the scope library: ``` agent-scope/lib/denial.mjs # the builders -agent-scope/lib/denial.test.mjs # 33 unit tests +agent-scope/lib/denial.test.mjs # 40 unit tests ``` No special tokens or APIs — the manifest is the source of truth; edit it to diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 71acb97bb..5c1a585ea 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -266,9 +266,9 @@ async function startSmart() { const prompter = createPrompter(); let description = ''; try { - console.log('Describe the task — what to build or fix, which packages / behaviours'); - console.log('/ tests, and any files you already know about.'); - console.log('Press Enter to submit. (Multi-line pastes are captured in full.)'); + console.log('What are you working on?'); + console.log('(One or two sentences is plenty. Paste longer briefs if you have them.)'); + console.log('Press Enter to send.'); console.log(''); description = await prompter.askPasteableDescription('> '); } finally { diff --git a/agent-scope/lib/denial.mjs b/agent-scope/lib/denial.mjs index 40d50bfdf..30f6b69fa 100644 --- a/agent-scope/lib/denial.mjs +++ b/agent-scope/lib/denial.mjs @@ -1,8 +1,14 @@ // Builds structured denial payloads for every agent-scope enforcement layer. -// Each denial carries both a human-readable prose block AND a machine-readable -// JSON block delimited by the `agent-scope-menu` fence. Agents are instructed -// (via CLAUDE.md + .cursor/rules/agent-scope.mdc) to parse the JSON and surface -// the `options` array via AskQuestion — the plan-mode equivalent for denials. +// Each denial carries a short human-readable summary AND a machine-readable +// JSON block delimited by the `agent-scope-menu` fence. +// +// Agents are instructed (via CLAUDE.md + .cursor/rules/agent-scope.mdc + +// AGENTS.md) to: +// 1. Quote `humanSummary` in their AskQuestion prompt (keep it short and +// natural — like a chat message to a coworker). +// 2. Offer only the two entries in `simpleOptions` — the LLM-recommended +// action plus a free-text fallback. Never surface the full `options` +// list to the user; it exists for audit / back-compat / tests. // // Zero IO, zero deps. Pure functions; unit-testable. @@ -74,6 +80,48 @@ const CUSTOM_OPTION = { action: { kind: 'custom' }, }; +// Shorter version used in the two-option `simpleOptions` surface — this is +// the label the user sees in the plan-mode AskQuestion, so it should read +// like a chat button, not a legal clause. +const CUSTOM_OPTION_SIMPLE = { + id: 'custom_instruction', + label: 'Something else — tell me what', + action: { kind: 'custom' }, +}; + +// Short, natural-language label for the recommended action. The full +// `options` array keeps its verbose labels (back-compat + audit), but the +// plan-mode AskQuestion uses these casual ones so the prompt reads like a +// human wrote it. Falls back to the verbose label if the id is unknown. +function simpleLabelFor(optionId, { deniedPath, activeTaskId, altTaskId } = {}) { + if (optionId === 'add_file') return 'Add this file to the task and try again'; + if (optionId === 'add_glob') return 'Add this folder to the task and try again'; + if (optionId === 'bootstrap') return 'Yes, unlock it so I can do this edit'; + if (optionId === 'cancel') return 'Skip it'; + if (optionId === 'skip') return 'Skip and keep working on other things'; + if (optionId === 'fix_manifest') return 'Open the task file so I can fix it'; + if (optionId === 'clear_task') return 'Clear the active task for now'; + if (optionId === 'acknowledge') return 'OK, keep going'; + if (optionId && optionId.startsWith('switch_task_') && altTaskId) { + return `Switch to task "${altTaskId}" and try again`; + } + return null; +} + +// Build the two-option `simpleOptions` array for plan-mode AskQuestion. +// It always contains exactly two entries: the recommended option (with a +// short human label) and a free-text fallback. +function buildSimpleOptions(fullOptions, recommendedId) { + const rec = fullOptions.find(o => o.id === recommendedId) || fullOptions[0]; + if (!rec) return [CUSTOM_OPTION_SIMPLE]; + const altTaskId = rec.id.startsWith('switch_task_') ? rec.id.slice('switch_task_'.length) : null; + const label = simpleLabelFor(rec.id, { altTaskId }) || rec.label; + return [ + { id: rec.id, label, action: rec.action }, + CUSTOM_OPTION_SIMPLE, + ]; +} + // Menu for out-of-scope write denials (path is in the repo but not in scope). export function buildOutOfScopeOptions({ deniedPath, activeTaskId, alternatives }) { const opts = [ @@ -209,13 +257,14 @@ function wrapStructured(payload) { ].join('\n'); } -// Emit the human-readable prose and append the machine-readable JSON block. -// Agents are expected to find the fence and call AskQuestion with `options`. -function render(prose, structured) { +// Emit a short human-readable summary and append the machine-readable JSON +// block. Agents are instructed to quote `humanSummary` verbatim in their +// AskQuestion prompt and offer only the two `simpleOptions` — never the +// full `options` list. +function render(summary, structured) { return [ - prose.trim(), + `agent-scope: ${summary}`, '', - '⇣ Plan-mode menu (agent: surface these options via AskQuestion):', wrapStructured(structured), ].join('\n'); } @@ -227,6 +276,11 @@ export function buildPreToolUseDenial({ if (decision === 'protected') { const classification = classifyProtected(deniedPath); const options = buildProtectedOptions({ deniedPath }); + const recommendedOptionId = recommendFor('protected', options); + const humanSummary = + `I'd like to edit \`${deniedPath}\`, but it's ${classification.role}. ` + + `It's locked on purpose so an agent can't silently reshape its own guardrails — ` + + `unlocking needs your OK.`; const structured = { version: 1, hook: 'preToolUse', @@ -237,42 +291,26 @@ export function buildPreToolUseDenial({ protectedRole: classification.role, activeTask: taskId || null, protectedPatterns: [...PROTECTED_PATTERNS], + humanSummary, options, - recommendedOptionId: recommendFor('protected', options), + simpleOptions: buildSimpleOptions(options, recommendedOptionId), + recommendedOptionId, agentReasoning: null, }; - const prose = [ - `PROTECTED PATH — ${tool} on "${deniedPath}" was blocked by agent-scope.`, - ``, - `Why this file is guarded:`, - ` ${deniedPath} is ${classification.role}.`, - ` Letting an agent edit it would let the agent disable or reshape its own`, - ` enforcement. That's why it's always denied until a human explicitly`, - ` opts in — even when no task is active.`, - ``, - `What happens if the user says YES (enable bootstrap):`, - ` The human runs \`touch agent-scope/.bootstrap-token\` in their own`, - ` terminal. For as long as that file exists, ALL protection is disabled`, - ` (both protected paths and task-scope checks). After the edit is done,`, - ` they run \`rm agent-scope/.bootstrap-token\` to re-lock the system.`, - ``, - `What happens if the user says NO:`, - ` The edit is cancelled (or skipped). No hidden retries — the other`, - ` hooks would revert it anyway.`, - ``, - `Agent: surface the menu below via AskQuestion. Include a 1–2 sentence`, - `explanation of WHY you wanted to touch this file (your reasoning) and`, - `lead with the recommended option unless you have concrete grounds to`, - `override it.`, - ].join('\n'); - return { message: render(prose, structured), structured }; + return { message: render(humanSummary, structured), structured }; } // out-of-scope (deny) const alternatives = findAlternativeTasks(deniedPath, root, taskId); const options = buildOutOfScopeOptions({ deniedPath, activeTaskId: taskId, alternatives }); + const recommendedOptionId = recommendFor('out-of-scope', options); const positives = ((task && task.allowed) || []).filter(p => !p.startsWith('!')); const exemptions = ((task && task.exemptions) || []).filter(p => !p.startsWith('!')); + const humanSummary = + `I'd like to edit \`${deniedPath}\`, but the active task ` + + `${taskId ? `\`${taskId}\`` : '(none)'}` + + `${task && task.description ? ` — ${task.description}` : ''}` + + ` doesn't cover that file.`; const structured = { version: 1, hook: 'preToolUse', @@ -286,45 +324,35 @@ export function buildPreToolUseDenial({ suggestedGlob: suggestGlob(deniedPath), suggestedTightGlob: suggestTightGlob(deniedPath), alternativeTasks: alternatives, + humanSummary, options, - recommendedOptionId: recommendFor('out-of-scope', options), + simpleOptions: buildSimpleOptions(options, recommendedOptionId), + recommendedOptionId, agentReasoning: null, }; - const prose = [ - `OUT OF TASK SCOPE — ${tool} blocked by agent-scope.`, - ` Active task: ${taskId}${task && task.description ? ` — ${task.description}` : ''}`, - ` Denied path: ${deniedPath}`, - ``, - `This task only permits writes matching:`, - ...(positives.length ? positives.map(p => ` - ${p}`) : [' (nothing — manifest has no positive allows)']), - ...(exemptions.length ? ['', 'Plus always-allowed exemptions:', ...exemptions.map(p => ` - ${p}`)] : []), - ``, - `STOP. Do not retry via another tool or a different command form. Use the`, - `plan-mode menu below to ask the user how to proceed.`, - ].join('\n'); - return { message: render(prose, structured), structured }; + return { message: render(humanSummary, structured), structured }; } // Build a manifest-load-error denial message. export function buildLoadErrorDenial({ taskId, error }) { const options = buildLoadErrorOptions({ taskId, error }); + const recommendedOptionId = recommendFor('manifest-load-error', options); + const humanSummary = + `The active task manifest \`${taskId}\` won't load — ${error}. ` + + `I can't apply any scope check until it's fixed or cleared.`; const structured = { version: 1, hook: 'preToolUse', reason: 'manifest-load-error', activeTask: taskId, error, + humanSummary, options, - recommendedOptionId: recommendFor('manifest-load-error', options), + simpleOptions: buildSimpleOptions(options, recommendedOptionId), + recommendedOptionId, agentReasoning: null, }; - const prose = [ - `agent-scope: failed to load active task manifest "${taskId}".`, - ` Error: ${error}`, - ``, - `Fix agent-scope/tasks/${taskId}.json or clear the active task.`, - ].join('\n'); - return { message: render(prose, structured), structured }; + return { message: render(humanSummary, structured), structured }; } // Build a beforeShellExecution denial message from a set of violations. @@ -361,6 +389,18 @@ export function buildShellPrecheckDenial({ suggestedFix = null; } + const recommendedOptionId = recommendFor(reason, options); + const firstPath = firstProtPath || firstScopePath || '(target)'; + const firstCmd = violations[0]?.cmd || 'command'; + const humanSummary = + reason === 'protected' + ? `The shell command I was about to run (\`${firstCmd}\` on \`${firstPath}\`) ` + + `would touch a protected system file. Blocked before it ran.` + : reason === 'out-of-scope' + ? `The shell command I was about to run (\`${firstCmd}\` on \`${firstPath}\`) ` + + `would write outside the active task \`${taskId || '(none)'}\`. Blocked before it ran.` + : `That shell command was blocked before it ran.`; + const structured = { version: 1, hook: 'beforeShellExecution', @@ -371,23 +411,14 @@ export function buildShellPrecheckDenial({ cmd: v.cmd, path: v.path, decision: v.decision, })), suggestedFix, + humanSummary, options, - recommendedOptionId: recommendFor(reason, options), + simpleOptions: buildSimpleOptions(options, recommendedOptionId), + recommendedOptionId, agentReasoning: null, }; - const prose = [ - `Destructive shell command blocked by agent-scope pre-shell guard.`, - ` Active task: ${task ? task.id : '(none — only system protection applies)'}`, - ``, - `Violations:`, - ...violations.map(v => ` - ${v.cmd} ${v.path} [${v.decision}]`), - ``, - `STOP. The post-exec backstop would revert tracked files and delete`, - `untracked ones in denied paths anyway; use the menu below instead of`, - `retrying with a different command form.`, - ].join('\n'); - return { message: render(prose, structured), structured }; + return { message: render(humanSummary, structured), structured }; } // Build an afterShellExecution context message. Unlike the other two this @@ -429,6 +460,25 @@ export function buildAfterShellContext({ ]; } + const recommendedOptionId = recommendFor(reason, options); + const touchedCount = reverted.length + deleted.length; + const humanSummary = (() => { + if (touchedCount === 0) { + return `A shell command ran and finished cleanly — nothing needed to be reverted.`; + } + const bits = []; + if (reverted.length) bits.push(`reverted ${reverted.length} file${reverted.length === 1 ? '' : 's'}`); + if (deleted.length) bits.push(`deleted ${deleted.length} new file${deleted.length === 1 ? '' : 's'}`); + const fix = bits.join(' and '); + if (reason === 'protected') { + return `A shell command touched a protected system file, so I ${fix} to put things back.`; + } + if (reason === 'out-of-scope') { + return `A shell command touched files outside the active task \`${taskId}\`, so I ${fix} to put things back.`; + } + return `A shell command touched files it shouldn't have, so I ${fix}.`; + })(); + const structured = { version: 1, hook: 'afterShellExecution', @@ -438,25 +488,26 @@ export function buildAfterShellContext({ reverted, deleted, unreverted: unreverted.map(u => ({ path: u.path, status: u.status, reason: u.reason })), + humanSummary, options, - recommendedOptionId: recommendFor(reason, options), + simpleOptions: buildSimpleOptions(options, recommendedOptionId), + recommendedOptionId, agentReasoning: null, }; - const lines = [ - `agent-scope: shell command modified out-of-task or protected files` + - (task ? ` (task: ${task.id}).` : ' (no active task — only protected paths enforced).'), - ]; + // Prose stays minimal: the humanSummary + paths the agent may want to + // reference. No banners, no STOP, no agent-directed meta copy. + const lines = [humanSummary]; if (reverted.length) { - lines.push('', 'Reverted via `git checkout --`:'); + lines.push('', 'Reverted:'); for (const p of reverted) lines.push(` - ${p}`); } if (deleted.length) { - lines.push('', 'Deleted (untracked, not allowed to persist):'); + lines.push('', 'Deleted:'); for (const p of deleted) lines.push(` - ${p}`); } if (unreverted.length) { - lines.push('', 'Could NOT revert (please review manually):'); + lines.push('', 'Could not revert (please review):'); for (const u of unreverted) lines.push(` - ${u.path} [${u.status}] ${u.reason}`); } diff --git a/agent-scope/lib/denial.test.mjs b/agent-scope/lib/denial.test.mjs index cdbc20e6f..84bf0045b 100644 --- a/agent-scope/lib/denial.test.mjs +++ b/agent-scope/lib/denial.test.mjs @@ -271,11 +271,19 @@ test('buildPreToolUseDenial: protected → structured protected menu', () => { assert.equal(parsed.recommendedOptionId, 'cancel'); assert.equal(parsed.agentReasoning, null, 'agent fills this in when surfacing'); assert.equal(structured.reason, 'protected'); - assert.ok(message.includes('PROTECTED PATH')); - // Prose now explains WHY this specific file is guarded + the yes/no flow. - assert.ok(message.includes('Why this file is guarded'), 'prose has Why block'); - assert.ok(message.includes('What happens if the user says YES'), 'prose has YES block'); - assert.ok(message.includes('What happens if the user says NO'), 'prose has NO block'); + // Human summary is short, natural, contains the denied path, and is + // surfaced in the rendered prose so the agent can quote it verbatim. + assert.ok(typeof parsed.humanSummary === 'string'); + assert.ok(parsed.humanSummary.length > 0 && parsed.humanSummary.length < 400, + 'humanSummary stays concise'); + assert.ok(parsed.humanSummary.includes('.cursor/hooks/x.mjs')); + assert.ok(message.includes(parsed.humanSummary), + 'rendered prose includes the humanSummary verbatim'); + // No more ALL-CAPS banners or agent-directed meta copy in the prose. + assert.ok(!message.includes('PROTECTED PATH'), 'prose is banner-free'); + assert.ok(!message.includes('STOP'), 'prose is banner-free'); + assert.ok(!/surface the menu below/i.test(message), + 'prose has no "surface the menu" agent-directed copy'); // Structured payload carries the classification so downstream tools can use it. assert.equal(parsed.protectedKind, 'cursor-hook'); assert.ok(typeof parsed.protectedRole === 'string' && parsed.protectedRole.length > 0); @@ -307,7 +315,12 @@ test('buildPreToolUseDenial: out-of-scope → full metadata + alternatives', () assert.ok(ids.includes('custom_instruction')); assert.equal(p.recommendedOptionId, 'add_glob'); assert.equal(p.agentReasoning, null); - assert.ok(message.includes('OUT OF TASK SCOPE')); + // Human-sounding summary instead of the old ALL-CAPS banner. + assert.ok(!message.includes('OUT OF TASK SCOPE'), + 'prose no longer uses the ALL-CAPS banner'); + assert.ok(typeof p.humanSummary === 'string' && p.humanSummary.length < 400); + assert.ok(p.humanSummary.includes('packages/evm-module/contracts/S.sol')); + assert.ok(p.humanSummary.includes('sync'), 'summary mentions the active task'); } finally { cleanup(root); } }); @@ -428,8 +441,8 @@ test('buildAfterShellContext: reverted + deleted in message', () => { deleted: ['.cursor/hooks/bad.mjs'], unreverted: [], }); - assert.ok(message.includes('Reverted via')); - assert.ok(message.includes('Deleted (untracked')); + assert.ok(message.includes('Reverted:')); + assert.ok(message.includes('Deleted:')); assert.ok(message.includes('packages/other/x.ts')); assert.ok(message.includes('.cursor/hooks/bad.mjs')); const p = extractJson(message); @@ -535,6 +548,103 @@ test('every denial builder sets recommendedOptionId to a valid option', () => { } finally { cleanup(root); } }); +// --------------------------------------------------------------------------- +// simpleOptions — the two-option plan-mode surface +// --------------------------------------------------------------------------- + +test('simpleOptions: exactly two entries (recommended + custom) on every builder', () => { + const root = makeTempRepo(); + try { + const cases = [ + buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', + task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), + buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', + decision: 'protected', task: null, taskId: null, root }), + buildLoadErrorDenial({ taskId: 't', error: 'bad' }), + buildShellPrecheckDenial({ command: 'rm x', + violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], + task: { id: 't' }, taskId: 't', root }), + buildAfterShellContext({ command: 'x', + task: { id: 't' }, taskId: 't', root, + reverted: ['a.ts'], deleted: [], unreverted: [] }), + ]; + for (const { message } of cases) { + const p = extractJson(message); + assert.ok(Array.isArray(p.simpleOptions), 'simpleOptions is an array'); + assert.equal(p.simpleOptions.length, 2, + 'simpleOptions always has exactly two entries (recommended + custom)'); + const [rec, custom] = p.simpleOptions; + assert.equal(rec.id, p.recommendedOptionId, + 'first simple option matches recommendedOptionId'); + assert.equal(custom.id, 'custom_instruction', + 'second simple option is the custom free-text fallback'); + assert.equal(custom.action.kind, 'custom'); + for (const opt of p.simpleOptions) { + assert.ok(typeof opt.id === 'string' && opt.id.length); + assert.ok(typeof opt.label === 'string' && opt.label.length); + assert.ok(opt.action && typeof opt.action.kind === 'string'); + } + } + } finally { cleanup(root); } +}); + +test('simpleOptions: recommended labels are short and natural', () => { + const root = makeTempRepo(); + try { + // out-of-scope → recommended is add_glob → "Add this folder..." + const { message: m1 } = buildPreToolUseDenial({ tool: 'Write', + deniedPath: 'packages/foo/bar.ts', decision: 'deny', + task: { id: 't', allowed: ['other/**'] }, taskId: 't', root }); + const p1 = extractJson(m1); + assert.equal(p1.simpleOptions[0].label, 'Add this folder to the task and try again'); + + // protected → recommended is cancel → "Skip it" + const { message: m2 } = buildPreToolUseDenial({ tool: 'Write', + deniedPath: '.cursor/hooks/x.mjs', decision: 'protected', + task: null, taskId: null, root }); + const p2 = extractJson(m2); + assert.equal(p2.simpleOptions[0].label, 'Skip it'); + + // custom label is the natural one too + assert.equal(p2.simpleOptions[1].label, 'Something else — tell me what'); + } finally { cleanup(root); } +}); + +// --------------------------------------------------------------------------- +// humanSummary — short, natural, quotable by the agent +// --------------------------------------------------------------------------- + +test('humanSummary: present, short, no banners, no agent-directed meta copy', () => { + const root = makeTempRepo(); + try { + const cases = [ + buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', + task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), + buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', + decision: 'protected', task: null, taskId: null, root }), + buildLoadErrorDenial({ taskId: 't', error: 'bad' }), + buildShellPrecheckDenial({ command: 'rm x', + violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], + task: { id: 't' }, taskId: 't', root }), + buildAfterShellContext({ command: 'x', + task: { id: 't' }, taskId: 't', root, + reverted: ['a.ts'], deleted: [], unreverted: [] }), + ]; + for (const { message } of cases) { + const p = extractJson(message); + assert.ok(typeof p.humanSummary === 'string' && p.humanSummary.length > 0); + assert.ok(p.humanSummary.length <= 400, + `humanSummary is concise (<= 400 chars): "${p.humanSummary}"`); + // No ALL-CAPS banners. + assert.ok(!/PROTECTED PATH|OUT OF TASK SCOPE|STOP\b/.test(p.humanSummary), + 'humanSummary has no ALL-CAPS banners'); + // No agent-directed meta copy. + assert.ok(!/surface .* menu|via AskQuestion/i.test(p.humanSummary), + 'humanSummary is not agent-directed meta copy'); + } + } finally { cleanup(root); } +}); + test('custom_instruction option appears in every denial menu', () => { const root = makeTempRepo(); try { diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs index 9b521e3fe..2e4033c91 100644 --- a/agent-scope/lib/onboarding.mjs +++ b/agent-scope/lib/onboarding.mjs @@ -63,7 +63,7 @@ export function buildOnboardingTrigger({ description = '' } = {}) { '', hasDesc ? 'The user ran `pnpm task start --smart` and has already provided their task description below. DO NOT ask them to describe it again — use the description as your brief.' - : 'The user ran `pnpm task start --smart` but did not include a description. Ask them to describe the task in detail before proceeding.', + : 'The user ran `pnpm task start --smart` but did not include a description. Ask them to describe the task in one short chat message before proceeding.', ...descBlock, 'Smart onboarding protocol — follow EXACTLY (full text in CLAUDE.md,', '.cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md):', @@ -72,7 +72,7 @@ export function buildOnboardingTrigger({ description = '' } = {}) { ' 2. Delete `agent-scope/.pending-onboarding` if it still exists.', hasDesc ? ' 3. Read the description above. Do not ask the user to describe it.' - : ' 3. Ask the user in plain chat to describe the task in detail; wait for reply.', + : ' 3. Ask in plain chat: "What are you working on?" Wait for reply.', ' 4. Explore the codebase — Glob, Grep, Read, SemanticSearch, DKG —', ' to find the files the task will touch.', ' 5. Draft a conservative set of allowed globs:', @@ -81,38 +81,28 @@ export function buildOnboardingTrigger({ description = '' } = {}) { ' - prefer whole-package globs (packages//**) over files', ' when in doubt — over-scoping is safe, under-scoping causes', ' constant denials mid-work.', - ' 6. Propose the scope via a SINGLE `AskQuestion` call with TWO questions:', + ' 6. Propose the scope via a SINGLE `AskQuestion` — ONE question, TWO', + ' options. Write it like you are asking a coworker. Three', + ' sentences max: one-line rephrase of the task, the scope you', + ' propose as 3–5 bullet globs, then "Sound good?" Options (ids', + ' exactly):', + ' - go — "Yes, go with that"', + ' - custom_instruction — "Tell me what to change"', '', - ' Q1 (allow_multiple = true):', - ' prompt: "Which packages should be writable for this task?"', - ' options: one entry per candidate package, labelled', - ' " files match description keywords"', - ' plus a sample of 2-3 relevant paths where helpful.', - ' pre-check: the packages you already decided to include.', + ' 7. On `go`: YOU run `pnpm task create --description "..." \\', + ' --allowed "..." --inherits base --activate` via the Shell tool.', + ' The afterShellExecution / PostToolUse-Bash hook has a narrow', + ' allowlist for this exact shape, so the manifest + active', + ' marker persist. The `--allowed` flags MUST match the globs you', + ' proposed verbatim. After success, continue with the real work', + ' in the same turn.', '', - ' Q2 (single-select):', - ' prompt: "Action?"', - ' options:', - ' - approve — "Create + activate this scope"', - ' - show_json — "Show the full manifest JSON first"', - ' - edit_globs — "Let me hand-edit the allowed/deny globs"', - ' - widen — "Let me add another package/file"', - ' - narrow — "Let me remove a package/file"', - ' - cancel — "Abort, no task"', - ' - custom_instruction — "Let me type my own instruction"', + ' 8. On `custom_instruction`: ask the user in plain chat what they', + ' want changed (packages, globs, task id, whatever). Apply it to', + ' the draft and re-ask step 6 — still ONE question, TWO options.', '', - ' 7. On `approve` (Q2) with the Q1 selection:', - ' Print a fenced bash block with the EXACT `pnpm task create ...', - ' --activate` command. Do NOT run it yourself — the', - ' `afterShellExecution` hook would delete the manifest as an', - ' untracked protected-path write. The user runs it.', - '', - ' 8. On `show_json`: print the drafted manifest, then re-ask step 6.', - ' 9. On `edit_globs` / `widen` / `narrow`: ask one follow-up in chat,', - ' update the draft, then re-ask step 6.', - ' 10. On `cancel`: acknowledge, no task is set, continue unscoped.', - ' 11. On `custom_instruction`: ask in plain chat, then do what the user', - ' says.', + 'Phrasing: short and natural. No ALL-CAPS banners, no architecture', + 'explanations, no emoji unless the user uses them first.', '', 'Your onboarding turn starts now. Skip any other pending work until the', 'scope is approved or cancelled.', From 562e82d8f5f097d155088f7760155955af729869 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 13:08:17 +0200 Subject: [PATCH 11/21] agent-scope: make smart onboarding the only pnpm task start mode Drop the interactive CLI wizard and the --smart flag. `pnpm task start` now always captures a task description, drops the one-shot marker, and lets the agent propose a scope in chat. - bin/task.mjs: collapse start() + startSmart() + startInteractive() into a single start(); remove --smart/--chat/--interactive flags and all wizard-only helpers. - lib/wizard.mjs + wizard.test.mjs: removed (unused). - package.json: drop wizard.test.mjs from scope:test. - Rule files (.cursor/rules/agent-scope.mdc, CLAUDE.md, AGENTS.md, GEMINI.md, .cursorrules) and agent-scope/README.md: rewrite the onboarding section to describe one flow; rename "Smart onboarding protocol" to "Task onboarding protocol". All 194 unit tests pass. Made-with: Cursor --- .cursor/rules/agent-scope.mdc | 30 ++- .cursorrules | 16 +- AGENTS.md | 24 +-- CLAUDE.md | 34 ++- GEMINI.md | 8 +- agent-scope/README.md | 180 +++++++--------- agent-scope/bin/task.mjs | 315 +++------------------------- agent-scope/lib/onboarding.mjs | 29 +-- agent-scope/lib/onboarding.test.mjs | 2 +- agent-scope/lib/prompter.test.mjs | 2 +- agent-scope/lib/shell-parse.mjs | 2 +- agent-scope/lib/wizard.mjs | 297 -------------------------- agent-scope/lib/wizard.test.mjs | 262 ----------------------- package.json | 2 +- 14 files changed, 163 insertions(+), 1040 deletions(-) delete mode 100644 agent-scope/lib/wizard.mjs delete mode 100644 agent-scope/lib/wizard.test.mjs diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 820398fd4..953b4b1d7 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -1,5 +1,5 @@ --- -description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` (interactive wizard, preferred) or `pnpm task start --smart` (agent-guided with pre-captured description) or an active task. All denials surface a plan-mode AskQuestion menu. +description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` (agent-guided with pre-captured description) or an active task. All denials surface a plan-mode AskQuestion menu. alwaysApply: true --- @@ -10,17 +10,11 @@ engages it. If no active task is set and bootstrap is off, the system is a no-op for you — behave normally. The hardcoded protected paths (the guard's own files) are still defended, but that only matters if you try to touch them. -The user turns the system on in one of three ways: - -1. **Interactive wizard (most common, no agent involvement)** — they run - `pnpm task start` in their terminal. A CLI wizard asks them a few - questions and writes + activates a manifest directly. No marker is - dropped and you see NOTHING in chat. By the time they message you, the - session-start hook has already injected the active-task context block — - treat it like case 3 below. -2. **Smart onboarding (`pnpm task start --smart`)** — they want YOU to read - their task description, explore the repo, and propose a scope. The CLI - captures a multi-line description from them and drops a one-shot marker +The user turns the system on in one of two ways: + +1. **`pnpm task start`** — they want YOU to read their task description, + explore the repo, and propose a scope. The CLI captures a task + description from them and drops a one-shot marker (`agent-scope/.pending-onboarding`, gitignored) that already embeds the description. THREE parallel consumers compete for the marker — whichever runs first wins, because consumption is atomic read-and-delete: @@ -34,7 +28,7 @@ The user turns the system on in one of three ways: Result: the next user message in any chat, new or existing, triggers onboarding exactly once. After that the marker is gone and everything else behaves normally. -3. **Existing active task** — the session-start hook injects a context block +2. **Existing active task** — the session-start hook injects a context block naming the active task. From that moment on, writes are task-scoped and every denial must be surfaced via the plan-mode denial protocol below. @@ -66,11 +60,11 @@ postToolUse hooks), treat it the same as finding the marker: pivot to the onboarding protocol immediately. The hooks already delete the marker for you in that case; do not double-delete — just follow the protocol. -## Smart onboarding protocol — when the user wants to start a new scoped task +## Task onboarding protocol — when the user wants to start a new scoped task Triggered by any of: -- The smart-mode trigger text (either found in the pending-onboarding +- The onboarding trigger text (either found in the pending-onboarding marker, or injected by the `sessionStart` / `postToolUse` hook). It begins with `agent-scope: start task onboarding.` and usually contains a `=== USER TASK DESCRIPTION ===` block. @@ -82,9 +76,9 @@ Follow these steps exactly: 1. **Get the task description.** - **If the trigger already contains a `=== USER TASK DESCRIPTION ===` block** - (the `--smart` flow): use that description verbatim as your brief. - DO NOT re-ask the user to describe the task — they already typed it - into the CLI. + (the `pnpm task start` flow): use that description verbatim as your + brief. DO NOT re-ask the user to describe the task — they already + typed it into the CLI. - **Otherwise** (bare marker or an intent phrase), send a plain chat message: > "OK, let's scope a new task. Describe in detail what we're building diff --git a/.cursorrules b/.cursorrules index 7b1ae5324..1d06a62e0 100644 --- a/.cursorrules +++ b/.cursorrules @@ -20,15 +20,13 @@ Quick summary: - `.claude/hooks/**`, `.claude/settings.json` - `agent-scope/**` (lib, bin, schema, tasks, active, .bootstrap-token) - `AGENTS.md`, `GEMINI.md`, `.cursorrules` -- The user's default onboarding is `pnpm task start` — an interactive CLI - wizard that writes the manifest itself. You see nothing; an active task - may just be set when you start talking. -- If the user runs `pnpm task start --smart`, a one-shot marker at - `agent-scope/.pending-onboarding` is dropped. The marker already - embeds the user's task description in a `=== USER TASK DESCRIPTION ===` - block — do NOT ask them to describe it again. On your first action of - any turn with no active task, check the marker — if present, delete it - and pivot to the smart-onboarding protocol in `AGENTS.md`. +- The user's onboarding is `pnpm task start` — they paste a description + in the CLI; a one-shot marker at `agent-scope/.pending-onboarding` is + dropped that already embeds the description in a + `=== USER TASK DESCRIPTION ===` block. Do NOT ask them to describe it + again. On your first action of any turn with no active task, check the + marker — if present, delete it and pivot to the onboarding protocol in + `AGENTS.md`. - Never invent menu options when surfacing a denial; reuse the JSON `options` array verbatim and add your reasoning + recommendation. - Hook-supporting agents (Cursor, Claude Code) physically block out-of-scope diff --git a/AGENTS.md b/AGENTS.md index e3c01b0fb..4f5ca15d2 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -19,13 +19,11 @@ through `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`. The guard is **invisible by default**. It only activates when: -1. The user runs `pnpm task start --smart` and the trigger line +1. The user runs `pnpm task start` and the trigger line `agent-scope: start task onboarding.` reaches you (via a hook or via your own top-of-turn marker check). The marker already embeds the user's task description in a `=== USER TASK DESCRIPTION ===` block — - do NOT ask the user to describe it again. The default `pnpm task start` - without `--chat` is an interactive CLI wizard that writes a manifest - itself and never reaches you, so most users will skip this flow, OR + do NOT ask the user to describe it again, OR 2. An active task is set (`agent-scope/active` exists; the session-start hook will inject a context block naming it; or you can check by running `pnpm task show`), OR @@ -55,16 +53,13 @@ terminal: touch agent-scope/.bootstrap-token ``` -## Smart onboarding (when the user runs `pnpm task start --smart`) +## Task onboarding (when the user runs `pnpm task start`) -`pnpm task start --smart` captures a multi-line task description from the -user in the terminal, then drops a one-shot marker file at +`pnpm task start` captures a task description from the user in the +terminal, then drops a one-shot marker file at `agent-scope/.pending-onboarding` containing trigger text *and* the user's description embedded in a `=== USER TASK DESCRIPTION ===` block. -(The default `pnpm task start` without `--smart` is an interactive CLI -wizard that never involves you — by the time the user messages you, the -manifest is already written and activated.) The marker is consumed -atomically the first time anything reads it. +The marker is consumed atomically the first time anything reads it. For Codex CLI and other agents without hook support, you should **proactively check for this marker on the first action of every turn** when no task is @@ -80,8 +75,8 @@ active: 1. **Get the task description.** - If the marker contains a `=== USER TASK DESCRIPTION ===` block - (the `--smart` flow), use that verbatim as the brief. DO NOT ask - the user to describe the task again. + (the `pnpm task start` flow), use that verbatim as the brief. DO + NOT ask the user to describe the task again. - Otherwise, ask them in chat: "Describe the task in detail — packages, behaviours, tests, any files you already know about." Wait for reply. @@ -186,8 +181,7 @@ instead and follow their reply. Never invent options. ## CLI quick reference ``` -pnpm task start # interactive wizard (default, preferred) -pnpm task start --smart # user pastes description; agent proposes scope +pnpm task start # user pastes description; agent proposes scope in chat pnpm task list # list available task manifests pnpm task show # show the active task and its scope pnpm task set # set the active task diff --git a/CLAUDE.md b/CLAUDE.md index 8b9eb259d..fbb8f39f9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -190,30 +190,25 @@ This repo ships an opt-in task-scoping guard. It stays **invisible** unless engaged. Default behaviour: write normally, read anything. The guard only kicks in when: -1. The user runs `pnpm task start` (interactive wizard — most common; by - the time they message you, an active task is already set and the - session-start hook has injected the context block), OR -2. The user runs `pnpm task start --smart` and the trigger line +1. The user runs `pnpm task start` and the trigger line `agent-scope: start task onboarding` appears in your context (marker consumed by a hook or by your top-of-turn check). The marker already embeds the user's task description — do NOT ask them to describe it again, OR -3. An active task is set (session-start hook injects a context block naming +2. An active task is set (session-start hook injects a context block naming it), OR -4. You try to touch one of the hardcoded protected paths that defend the +3. You try to touch one of the hardcoded protected paths that defend the guard itself — those are always blocked unless the user has enabled bootstrap mode. -### Smart onboarding protocol (triggered by `pnpm task start --smart` / "start a scoped task") +### Task onboarding protocol (triggered by `pnpm task start` / "start a scoped task") -`pnpm task start --smart` asks the user for a multi-line task description -in the terminal, then drops `agent-scope/.pending-onboarding` (gitignored) -with that description already embedded, and copies the trigger to the -user's clipboard. (The default `pnpm task start` without `--smart` is the -interactive CLI wizard — it writes the manifest itself and never reaches -you.) The marker is one-shot: the first thing that consumes it also -deletes it, so it fires for **exactly one** user message no matter which -chat that message lands in. +`pnpm task start` asks the user for a task description in the terminal, +drops `agent-scope/.pending-onboarding` (gitignored) with that description +already embedded, and copies the trigger to the user's clipboard. The +marker is one-shot: the first thing that consumes it also deletes it, so +it fires for **exactly one** user message no matter which chat that +message lands in. Three parallel consumers — whichever runs first wins: @@ -238,7 +233,7 @@ active task** (`agent-scope/active` is absent), your FIRST action MUST be: b. IGNORE the user's actual message (whatever they typed, even if it was a specific instruction — they knew onboarding was pending when they sent it). - c. Pivot to the **Smart onboarding protocol** below: parse the marker + c. Pivot to the **Task onboarding protocol** below: parse the marker payload for a `=== USER TASK DESCRIPTION ===` block. If present, use it as the brief and skip step 1 below. Otherwise, ask the user to describe the task. @@ -254,8 +249,8 @@ protocol below. 1. **Get the task description.** - If the trigger / marker contains a `=== USER TASK DESCRIPTION ===` - block (the `--smart` flow), use that verbatim. DO NOT ask the user to - describe the task again — they already typed it into the CLI. + block (the `pnpm task start` flow), use that verbatim. DO NOT ask the + user to describe the task again — they already typed it into the CLI. - Otherwise, ask them in plain chat: "OK, let's scope a new task. Describe in detail what we're building or fixing — packages, behaviours, tests, any files you already know about." Wait for @@ -385,8 +380,7 @@ The JSON shape (key fields only): ### CLI quick reference ``` -pnpm task start # interactive wizard (default) — user runs this; writes + activates manifest directly -pnpm task start --smart # user pastes description in CLI; agent proposes scope in chat +pnpm task start # user pastes description in CLI; agent proposes scope in chat pnpm task create [flags] # non-interactive manifest build — agent runs this on approve (allowlisted) pnpm task list | show | set | clear | check | audit | resolve pnpm scope:status | scope:validate | scope:test diff --git a/GEMINI.md b/GEMINI.md index 8786404bc..448851b49 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -12,16 +12,12 @@ Key points for Gemini: `pnpm task check ` to test a specific path. - A set of system files is **always protected** regardless of task. See the "Hardcoded protected paths" section in `AGENTS.md`. -- Most users run `pnpm task start` without flags — that's an interactive - CLI wizard that writes a manifest directly. You don't see anything - special; by the time the user messages you, the active task is set. -- If the user runs `pnpm task start --smart`, a one-shot marker file at +- When the user runs `pnpm task start`, a one-shot marker file at `agent-scope/.pending-onboarding` is dropped. The marker already embeds the user's task description in a `=== USER TASK DESCRIPTION ===` block — do NOT ask them to describe it again. On your first action of any new turn (when no task is active), check whether that marker exists; - if it does, delete it and run the smart-onboarding protocol from - `AGENTS.md`. + if it does, delete it and run the onboarding protocol from `AGENTS.md`. - Gemini CLI does **not** have hard hook enforcement. You self-enforce by following the rules. The user trusts you to comply. - Never invent menu options when surfacing a denial — pass through the diff --git a/agent-scope/README.md b/agent-scope/README.md index 9beaf4eab..91175619c 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -22,27 +22,22 @@ session-start hook emits nothing, and the write/shell hooks only fire on the hardcoded protected paths (the guard's own files). You can do ad-hoc work without any task ceremony. -You engage the system in one of four ways: - -1. **Interactive wizard (default)** — run `pnpm task start`. A terminal - wizard asks you a few questions (description, which packages, extras), - drafts a manifest, shows a preview, and activates it. No agent round-trip - needed; works identically in every agent (Cursor, Claude Code, Codex, - Gemini, …) and even with no agent at all. -2. **Smart onboarding (`pnpm task start --smart`)** — agent-guided flow. - The CLI asks you for a multi-line task description, then drops a - one-shot marker that embeds that description and copies the trigger - text to your clipboard. The agent reads your description, explores - the repo semantically, and proposes a scope via a rich two-part - `AskQuestion` (multi-select packages + single-select action), and on - `approve` runs `pnpm task create` itself (the `afterShellExecution` - hook has a narrow allowlist for the canonical invocation — see - "Architecture / approved-task-create allowlist" below). Use this when - you want the agent to do the thinking. -3. **Explicit** — `pnpm task set ` activates a manifest you +You engage the system in one of three ways: + +1. **`pnpm task start`** — agent-guided onboarding. The CLI asks you for + a task description (single-Enter submit; multi-line pastes welcome), + drops a one-shot marker that embeds the description, and copies the + trigger text to your clipboard. The next message you send in any chat + (new or existing) makes the agent read your description, explore the + repo, and propose a scope via a short plan-mode `AskQuestion`. On + approval the agent runs `pnpm task create` itself (the + `afterShellExecution` hook has a narrow allowlist for the canonical + invocation — see "Architecture / approved-task-create allowlist" + below). +2. **Explicit** — `pnpm task set ` activates a manifest you already have. -4. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` - builds + activates a manifest in one shot. +3. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` + builds + activates a manifest in one shot (useful for CI / scripts). Clearing the active task (`pnpm task clear`) returns Cursor to its invisible default. @@ -68,9 +63,9 @@ or deleted afterwards. ### Approved-task-create allowlist The after-shell hooks include a narrow, audited allowlist so the agent -can finish the smart-onboarding flow itself — i.e. on plan-mode -`approve`, the agent runs `pnpm task create ...` and the hook lets -the resulting `agent-scope/tasks/.json` plus `agent-scope/active` +can finish the onboarding flow itself — i.e. on plan-mode `approve`, +the agent runs `pnpm task create ...` and the hook lets the +resulting `agent-scope/tasks/.json` plus `agent-scope/active` persist. The allowlist is: @@ -94,8 +89,8 @@ The allowlist is: so a syntactically-invalid manifest never reaches disk for the hook to allow. -This keeps the smart-onboarding UX one-step (agent runs the command -after you click Approve) without weakening protection: every non-matching +This keeps the onboarding UX one-step (agent runs the command after +you click Approve) without weakening protection: every non-matching write to `agent-scope/tasks/**` and `agent-scope/active` is still immediately reverted. @@ -196,12 +191,9 @@ pnpm scope:validate # validates every manifest ## Quick start ```bash -# Interactive wizard (default) — asks a few questions, drafts + activates a manifest +# Onboarding — paste a description in the CLI, the agent proposes a scope in chat pnpm task start -# Smart onboarding — paste a description in the CLI, agent proposes scope in chat -pnpm task start --smart - # Non-interactive manifest creation (flags) pnpm task create my-task \ --description "Refactor peer sync for workspace auth" \ @@ -238,86 +230,64 @@ pnpm task clear ## Onboarding flow -There are two onboarding flows. The **interactive wizard** is the default -— fully deterministic, no agent involvement. The **smart flow** -(`--smart`) is an AI-driven alternative when you want the agent to -understand your task description semantically and propose a scope. - -### Flow 1 — interactive wizard (default) - -Run `pnpm task start`. The CLI walks you through a short questionnaire: - -1. **Description** — one sentence describing the task. Used as - `description` in the manifest and as the seed for the task id. -2. **Task id** — auto-kebab-cased from the description; press Enter to - accept or type your own. -3. **Packages** — the wizard discovers workspace packages from - `pnpm-workspace.yaml` (or `package.json` `workspaces`, or a `packages/*` - fallback), presents them as a numbered list, and pre-selects the ones - whose names overlap with keywords in your description. Type the numbers - you want, or press Enter to accept the suggestion, or `none` to skip. -4. **Build-artefact exemptions** — y/n for the standard - `**/dist/**`, `**/*.tsbuildinfo`, `pnpm-lock.yaml` set. -5. **Extra allowed globs** (optional) and **extra deny globs** (optional) — - free-text, one per line, blank to finish. `!**/secrets.*` and - `!**/.env*` are always denied automatically. -6. **Preview** — prints the drafted manifest JSON. -7. **Save / edit / cancel** — `s` saves & activates, `e` opens `$EDITOR` - (or `$VISUAL`) on the file and re-validates on exit, `c` aborts without - writing anything. - -No chat round-trip, no agent needed, runs in under a second, works -identically in every agent. This is the recommended path. - -If `stdin` is not a TTY (CI, piped input), `pnpm task start` errors out -with guidance to use `pnpm task create --flags...` directly. Both -onboarding modes need interactive input. - -### Flow 2 — smart onboarding (`pnpm task start --smart`) - -The CLI prompts you for a task description (press Enter to submit — -multi-line pastes are captured in full via paste-detection), then drops -a one-shot marker at -`agent-scope/.pending-onboarding` (gitignored) that *already embeds* your -description inside a `=== USER TASK DESCRIPTION ===` block. The trigger -text is also copied to your clipboard. - -Your NEXT message in any chat (new or existing) makes the agent pivot to -smart onboarding. Three parallel consumers compete for the marker so it -fires exactly once: - -- **New chat (Cmd+L)** — the `sessionStart` hook injects the trigger. -- **Current chat, any message** — the agent's top-of-turn rule reads the - marker on its first action; the `postToolUse` hook injects it as - `additional_context` if the agent happens to call a tool first. -- **Manual paste** — the trigger is already in your clipboard. - -The agent then follows a fixed protocol (defined in -`.cursor/rules/agent-scope.mdc`, `CLAUDE.md`, `AGENTS.md`, `GEMINI.md`): - -1. Reads your description from the marker (does NOT ask you again). -2. Explores the codebase (Grep / Glob / SemanticSearch / DKG) to find - relevant files. Counts matching files per candidate package. -3. Proposes a scope via a **single short `AskQuestion`** — one question, - two options. The prompt is one-line rephrase of the task + the scope - as 3–5 bullet globs + "Sound good?" The options are: - - `go` — "Yes, go with that" - - `custom_instruction` — "Tell me what to change" -4. On `go`, the agent itself runs `pnpm task create ...` via the - shell tool. The `afterShellExecution` / PostToolUse-Bash hooks - recognise the canonical task-create invocation and allow its two - specific writes (`agent-scope/tasks/.json` and `agent-scope/active`) - to persist; every other write to those paths is still reverted. See - the "approved-task-create allowlist" section for details. -5. On `custom_instruction`, the agent asks in plain chat what you'd like - changed, updates the draft, and re-asks step 3. -6. Once approved, the agent starts the real work in the same turn. +`pnpm task start` is the single onboarding command. It's agent-guided: +the CLI captures your task description, drops a one-shot marker, and the +agent takes it from there. + +1. **You run `pnpm task start`** in the terminal. The CLI prompts: + + > What are you working on? + > (One or two sentences is plenty. Paste longer briefs if you have them.) + > Press Enter to send. + + Single Enter submits; multi-line pastes are captured in full via + paste-detection. + +2. **The CLI drops `agent-scope/.pending-onboarding`** (gitignored) — + a one-shot marker containing the onboarding protocol *and* your + description inside a `=== USER TASK DESCRIPTION ===` block. The + trigger text is also copied to your clipboard as a fallback. + +3. **Your next message in any chat triggers onboarding.** Three + parallel consumers compete for the marker so it fires exactly once: + + - **New chat (Cmd+L)** — the `sessionStart` hook injects the trigger. + - **Current chat, any message** — the agent's top-of-turn rule reads + the marker on its first action; the `postToolUse` hook injects it + as `additional_context` if the agent happens to call a tool first. + - **Manual paste** — the trigger is already in your clipboard. + +4. **The agent follows a fixed protocol** (defined in + `.cursor/rules/agent-scope.mdc`, `CLAUDE.md`, `AGENTS.md`, `GEMINI.md`): + + 1. Reads your description from the marker (does NOT ask you again). + 2. Explores the codebase (Grep / Glob / SemanticSearch / DKG) to find + relevant files. Counts matching files per candidate package. + 3. Proposes a scope via a **single short `AskQuestion`** — one + question, two options. The prompt is a one-line rephrase of the + task + the scope as 3–5 bullet globs + "Sound good?" The options + are: + - `go` — "Yes, go with that" + - `custom_instruction` — "Tell me what to change" + 4. On `go`, the agent itself runs `pnpm task create ...` via the + shell tool. The `afterShellExecution` / PostToolUse-Bash hooks + recognise the canonical task-create invocation and allow its two + specific writes (`agent-scope/tasks/.json` and + `agent-scope/active`) to persist; every other write to those paths + is still reverted. See the "approved-task-create allowlist" + section for details. + 5. On `custom_instruction`, the agent asks in plain chat what you'd + like changed, updates the draft, and re-asks step 3. + 6. Once approved, the agent starts the real work in the same turn. From here, every attempted write to an out-of-scope file triggers a plan-mode AskQuestion menu — see **Escalation** below. -The marker is one-shot: the first hook that consumes it also deletes it, -so the trigger fires exactly once per `pnpm task start --smart`. +The marker is one-shot: the first consumer that reads it also deletes +it, so onboarding fires exactly once per `pnpm task start`. + +If `stdin` is not a TTY (CI, piped input), `pnpm task start` errors out +with guidance to use `pnpm task create --flags...` directly. ## Manifest format diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 5c1a585ea..b793ebd91 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -13,20 +13,12 @@ import { isBootstrapActive, } from '../lib/scope.mjs'; import { - ONBOARDING_TRIGGER_TEXT, buildOnboardingTrigger, writeOnboardingMarker, copyToClipboard, } from '../lib/onboarding.mjs'; import { detectAgents, statusGlyph, summary } from '../lib/check-agent.mjs'; -import { - discoverPackages, - deriveTaskId, - suggestPackagesFromDescription, - buildManifest, -} from '../lib/wizard.mjs'; import { createPrompter } from '../lib/prompter.mjs'; -import { spawnSync } from 'node:child_process'; try { checkNodeVersion(); } catch (e) { console.error(e.message); process.exit(3); } @@ -198,31 +190,24 @@ async function init(id) { } // --------------------------------------------------------------------------- -// Task onboarding +// Task onboarding — `pnpm task start` // --------------------------------------------------------------------------- // -// Two independent ways to start a task: -// -// (1) `pnpm task start` — default. Interactive CLI wizard that asks a few -// questions (description, packages, extras), drafts a manifest, -// previews it, and saves + activates it. No agent round-trip; works -// identically in every agent and with no agent at all. Deterministic -// keyword match. +// Single flow: the CLI asks "What are you working on?" (one short prompt, +// single Enter to submit, multi-line pastes captured in full), then drops +// a one-shot marker file at `agent-scope/.pending-onboarding` embedding +// the user's description plus the Task onboarding protocol. The next +// message the user sends in ANY chat (new or existing) makes the agent +// read the description, explore the repo, and propose a scope via a +// plan-mode AskQuestion. On approval the agent runs `pnpm task create` +// itself via the allowlist. // -// (2) `pnpm task start --smart` — agent-guided mode. The CLI prompts -// once for a multi-line task description, then drops a marker that -// embeds that description + the Smart onboarding protocol. The next -// message the user sends in any chat makes the agent read the -// description, explore the repo, and propose a scope via a rich -// AskQuestion (two questions: multi-select packages + action). The -// agent prints a `pnpm task create ...` command for the user to run. -// -// If stdin is not a TTY we refuse — both modes need interactive input. -// For CI / scripts use `pnpm task create --description ... --allowed -// ... --activate` directly. +// Requires an interactive terminal. For non-interactive / CI use call +// `pnpm task create --description ... --allowed ... --activate` +// directly — that path has always been scripted-friendly. // --------------------------------------------------------------------------- -async function start(argv = []) { +async function start() { const { id: activeId } = resolveActiveTaskId(root); if (activeId) { console.log(`A task is already active: ${activeId}`); @@ -232,19 +217,7 @@ async function start(argv = []) { return; } - // Accept both --smart (canonical) and --chat (old name we're migrating - // away from). If someone still has `--chat` in muscle memory, warn and - // continue — don't make the rename a paper cut. - const smartMode = argv.includes('--smart') || argv.includes('-s'); - const legacyChat = argv.includes('--chat') || argv.includes('-c'); - const forceInteractive = argv.includes('--interactive') || argv.includes('-i'); - const ttyOk = Boolean(process.stdin.isTTY) || forceInteractive; - - if (legacyChat) { - console.error('warning: --chat was renamed to --smart; proceeding as --smart.'); - } - - if (!ttyOk) { + if (!process.stdin.isTTY) { console.error('error: `pnpm task start` requires an interactive terminal.'); console.error(''); console.error('For non-interactive / CI use, call `pnpm task create` directly:'); @@ -253,16 +226,6 @@ async function start(argv = []) { process.exit(2); } - if (smartMode || legacyChat) return startSmart(); - await startInteractive(); -} - -async function startSmart() { - console.log('agent-scope: smart task scoping'); - console.log(' (the agent will read your description, explore the repo, and propose a scope)'); - console.log(' (tip: `pnpm task start` without --smart runs the deterministic wizard instead)'); - console.log(''); - const prompter = createPrompter(); let description = ''; try { @@ -277,7 +240,7 @@ async function startSmart() { const trimmed = description.trim(); if (!trimmed || trimmed.length < 10) { - bail('description is too short — smart mode needs at least a sentence of context'); + bail('description is too short — give me at least a sentence of context'); } const trigger = buildOnboardingTrigger({ description: trimmed }); @@ -285,253 +248,26 @@ async function startSmart() { const clip = copyToClipboard(trigger); console.log(''); - console.log(`agent-scope: captured ${trimmed.split(/\s+/).length} words.`); + console.log(`Got it — captured ${trimmed.split(/\s+/).length} words.`); console.log(''); - console.log('Next step — exchange ONE short message with your agent:'); - console.log(''); - console.log(' 1. Go to your Cursor / Claude Code / Codex / Gemini chat.'); - console.log(' Any chat works — new or existing.'); - console.log(' 2. Send any message ("go", "hi", whatever).'); - console.log(' 3. The agent reads your description, explores the repo, and'); - console.log(' proposes a scope via a plan-mode AskQuestion. One click to'); - console.log(' approve (or edit) the scope.'); - console.log(' 4. Paste the generated `pnpm task create` command back here.'); + console.log('Now open any chat and send any message ("go", "hi", whatever).'); + console.log('Your agent will read the description, explore the repo, and'); + console.log('ask you one quick question to approve the proposed scope.'); console.log(''); if (clip.ok) { - console.log(`(Trigger also copied to clipboard via ${clip.method} — pasting works too.)`); + console.log(`(Trigger also copied to clipboard via ${clip.method}.)`); } else { - console.log(`(Clipboard copy unavailable: ${clip.reason}. Paste is optional —`); - console.log(` any message will trigger onboarding because of the marker file.)`); + console.log(`(Clipboard copy unavailable: ${clip.reason}. No paste needed —`); + console.log(` any message triggers onboarding because of the marker file.)`); } console.log(''); console.log(`Marker file: ${markerPath}`); console.log('(Auto-deleted the moment the agent reads it; one-shot.)'); console.log(''); - console.log('Change your mind? `rm agent-scope/.pending-onboarding` and run'); - console.log('`pnpm task start` for the deterministic wizard instead.'); + console.log('Change your mind? `rm agent-scope/.pending-onboarding` and start over.'); bootstrapWarning(); } -async function startInteractive() { - console.log('agent-scope: interactive task wizard'); - console.log(' (no agent needed — hit Ctrl+C any time to cancel, nothing is saved until the final "save" step.)'); - console.log(' (tip: for agent-guided onboarding instead, run `pnpm task start --chat`)'); - console.log(''); - - const prompter = createPrompter(); - try { - // 1) Description --------------------------------------------------------- - const description = await askNonEmpty( - prompter, - 'What are you working on? (one short sentence)\n> ', - 'A description is required so the task manifest is self-explanatory.', - ); - console.log(''); - - // 2) Task ID ------------------------------------------------------------- - const existingIds = listTasks(root); - const suggestedId = deriveTaskId(description, { existingIds }); - const idInput = await prompter.ask( - `Task id (press Enter to accept "${suggestedId}"): `, - { default: suggestedId }, - ); - let taskId = idInput; - if (!/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(taskId)) { - console.log(` (invalid id "${taskId}" — falling back to "${suggestedId}")`); - taskId = suggestedId; - } - if (existingIds.includes(taskId)) { - const deduped = deriveTaskId(taskId + '-alt', { existingIds }); - console.log(` (id "${taskId}" already exists — using "${deduped}")`); - taskId = deduped; - } - const manifestPath = resolve(tasksDir, `${taskId}.json`); - console.log(''); - - // 3) Packages ------------------------------------------------------------ - const packages = discoverPackages(root); - let selectedPackages = []; - if (packages.length === 0) { - console.log('No workspace packages detected — skipping package picker.'); - console.log('(You can add allowed globs freely in the next step.)'); - } else { - const suggested = suggestPackagesFromDescription(description, packages); - const suggestedSet = new Set(suggested.map(p => p.path)); - const suggestedIndices = []; - printPackageList(packages, suggestedSet); - packages.forEach((p, i) => { - if (suggestedSet.has(p.path)) suggestedIndices.push(i + 1); - }); - const prompt = suggestedIndices.length - ? `Pick packages (space/comma separated; Enter = suggested [${suggestedIndices.join(' ')}]; type "none" for none): ` - : `Pick packages (space/comma separated; "none" or blank for none): `; - const picked = await prompter.askMultiNumber(prompt, packages.length, { - default: suggestedIndices, - }); - selectedPackages = picked.map(i => packages[i - 1]).filter(Boolean); - if (selectedPackages.length) { - console.log(` Selected: ${selectedPackages.map(p => p.name).join(', ')}`); - } else { - console.log(' No packages selected. You can still add custom allowed globs below.'); - } - } - console.log(''); - - // 4) Build artefacts ----------------------------------------------------- - const includeBuildArtifacts = await prompter.askYesNo( - 'Include build artefacts + lockfile as exemptions (**/dist/**, *.tsbuildinfo, pnpm-lock.yaml)?', - { default: true }, - ); - console.log(''); - - // 5) Extras -------------------------------------------------------------- - const extraAllowed = await prompter.askLines( - 'Additional ALLOWED globs (optional):', - { hint: 'one per line, blank to finish (e.g. scripts/dev.ts)' }, - ); - const extraDeny = await prompter.askLines( - 'Additional DENY globs (optional):', - { hint: 'one per line, blank to finish (! is added automatically). secrets and .env* are denied by default.' }, - ); - console.log(''); - - // 6) Build & preview ----------------------------------------------------- - const inheritBase = listTasks(root).includes('base'); - const manifest = buildManifest({ - id: taskId, - description, - selectedPackages, - includeBuildArtifacts, - extraAllowed, - extraDeny, - inheritBase, - existingIds, - }); - - const errs = validateManifest(manifest, taskId); - if (errs.length) { - console.error('Generated manifest failed validation:'); - for (const e of errs) console.error(` - ${e}`); - bail('could not build a valid manifest from your inputs — aborting without saving'); - } - - if (!manifest.allowed && !manifest.inherits) { - console.log('Heads up: no allowed globs and no inherits — agent will have nothing it can write.'); - const proceed = await prompter.askYesNo('Continue anyway?', { default: false }); - if (!proceed) { console.log('Aborted. Nothing was saved.'); return; } - } - - console.log('Proposed manifest:'); - console.log(` ${manifestPath}`); - console.log(''); - for (const line of JSON.stringify(manifest, null, 2).split('\n')) { - console.log(` ${line}`); - } - console.log(''); - - // 7) Save / edit / cancel ----------------------------------------------- - const decision = await prompter.askChoice('What next?', [ - { key: 's', label: 'save and activate (recommended)' }, - { key: 'e', label: 'edit manually (opens $EDITOR; saved & activated on close)' }, - { key: 'c', label: 'cancel — nothing will be written' }, - ], { default: 's' }); - - if (decision === 'c') { console.log('Aborted. Nothing was saved.'); return; } - - if (existsSync(manifestPath)) { - const overwrite = await prompter.askYesNo( - `Manifest already exists at ${manifestPath}. Overwrite?`, - { default: false }, - ); - if (!overwrite) { console.log('Aborted. Existing manifest untouched.'); return; } - } - - writeFileSync(manifestPath, JSON.stringify(manifest, null, 2) + '\n', 'utf8'); - console.log(`Created ${manifestPath}`); - - if (decision === 'e') { - const opened = openInEditor(manifestPath); - if (!opened.ok) { - console.log(`(editor launch failed: ${opened.reason} — manifest is saved as-is, edit it later with your editor of choice)`); - } else { - // Re-validate after editing; if invalid, leave it there and warn. - let edited; - try { edited = JSON.parse(readFileSync(manifestPath, 'utf8')); } - catch (e) { - console.error(`Saved file is no longer valid JSON: ${e.message}`); - console.error('Leaving it in place. Fix it by hand and run `pnpm task validate ' + taskId + '`.'); - return; - } - const editErrs = validateManifest(edited, taskId); - if (editErrs.length) { - console.error('Edited manifest has validation errors:'); - for (const e of editErrs) console.error(` - ${e}`); - console.error('Leaving it in place. Fix it and run `pnpm task validate ' + taskId + '`.'); - return; - } - } - } - - writeFileSync(activeFile, `${taskId}\n`, 'utf8'); - console.log(`Activated: ${taskId}`); - console.log(''); - console.log('The agent can now only write files matching the allowed globs.'); - console.log('Useful next commands:'); - console.log(' pnpm task show — see current scope'); - console.log(' pnpm task check — test a single path'); - console.log(' pnpm task clear — exit task mode'); - bootstrapWarning(); - } finally { - prompter.close(); - } -} - -async function askNonEmpty(prompter, prompt, explain) { - for (let attempt = 0; attempt < 3; attempt++) { - const v = await prompter.ask(prompt); - if (v && v.trim().length >= 3) return v.trim(); - console.log(` ${explain}`); - } - bail('no description provided after 3 tries — aborting'); -} - -function printPackageList(packages, suggestedSet) { - console.log('Workspace packages:'); - const width = Math.max(...packages.map(p => p.name.length), 4); - const cols = 2; - const rows = Math.ceil(packages.length / cols); - for (let r = 0; r < rows; r++) { - const line = []; - for (let c = 0; c < cols; c++) { - const i = c * rows + r; - if (i >= packages.length) continue; - const p = packages[i]; - const n = (i + 1).toString().padStart(2, ' '); - const marker = suggestedSet && suggestedSet.has(p.path) ? '*' : ' '; - line.push(` ${marker}${n}. ${p.name.padEnd(width, ' ')}`); - } - console.log(line.join(' ')); - } - if (suggestedSet && suggestedSet.size) console.log(' (* = suggested from your description)'); -} - -function openInEditor(filePath) { - const editor = process.env.VISUAL || process.env.EDITOR || 'vi'; - try { - const parts = editor.split(/\s+/).filter(Boolean); - const cmd = parts[0]; - const args = parts.slice(1).concat(filePath); - const r = spawnSync(cmd, args, { stdio: 'inherit' }); - if (r.error) return { ok: false, reason: r.error.message }; - if (typeof r.status === 'number' && r.status !== 0) { - return { ok: false, reason: `editor exited with status ${r.status}` }; - } - return { ok: true }; - } catch (e) { - return { ok: false, reason: e.message }; - } -} - function parseCreateArgs(argv) { const out = { id: null, @@ -731,7 +467,7 @@ try { case 'clear': clear(); break; case 'check': check(rest[0]); break; case 'init': await init(rest[0]); break; - case 'start': await start(rest); break; + case 'start': await start(); break; case 'create': create(rest); break; case 'validate': validate(rest[0]); break; case 'audit': audit(rest); break; @@ -742,8 +478,7 @@ try { console.log([ 'usage: task [args]', '', - ' start interactive wizard: draft a manifest + activate', - ' start --smart paste a description, agent proposes scope in chat', + ' start describe the task; the agent proposes a scope in chat', ' list list available task manifests', ' show show the active task and its scope', ' set set the active task', diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs index 2e4033c91..e5182e9a1 100644 --- a/agent-scope/lib/onboarding.mjs +++ b/agent-scope/lib/onboarding.mjs @@ -1,17 +1,18 @@ -// Onboarding marker + clipboard helpers for `pnpm task start --smart`. +// Onboarding marker + clipboard helpers for `pnpm task start`. // -// The default `pnpm task start` is now a fully interactive CLI wizard that -// never involves the agent. This module powers the `--smart` mode, where -// the user pastes a task description in the CLI and the agent then reads -// that description, explores the repo, and proposes a scope. +// `pnpm task start` is the single onboarding flow: the CLI captures a +// task description from the user, then drops a one-shot marker file at +// `agent-scope/.pending-onboarding`. The next message the user sends in +// any chat makes the agent read the description, explore the repo, and +// propose a scope via a plan-mode AskQuestion. // // Flow: // -// 1. `pnpm task start --smart` reads a multi-line description from the -// user, then drops a one-shot marker file at -// `agent-scope/.pending-onboarding`. The marker contains both the -// trigger text AND the user's description, so the agent does not need -// to ask the user "describe the task" again. +// 1. `pnpm task start` reads a description from the user (single-Enter +// submit; multi-line pastes captured in full), then writes the +// marker. The marker contains both the trigger text AND the user's +// description, so the agent does not need to ask "describe the +// task" again. // 2. The user sends any message in any chat. // 3. THREE parallel consumers pick up the marker — whichever runs first // wins, because consume is atomic (read-and-delete): @@ -23,7 +24,7 @@ // top-of-turn marker check so even pure conversational messages // (e.g. "hi") consume the marker correctly. // -// 4. The agent follows the "Smart onboarding protocol" (CLAUDE.md, +// 4. The agent follows the "Task onboarding protocol" (CLAUDE.md, // .cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md). // // Zero runtime deps. Pure-ish (spawnSync for clipboard; filesystem for marker). @@ -62,10 +63,10 @@ export function buildOnboardingTrigger({ description = '' } = {}) { 'agent-scope: start task onboarding.', '', hasDesc - ? 'The user ran `pnpm task start --smart` and has already provided their task description below. DO NOT ask them to describe it again — use the description as your brief.' - : 'The user ran `pnpm task start --smart` but did not include a description. Ask them to describe the task in one short chat message before proceeding.', + ? 'The user ran `pnpm task start` and has already provided their task description below. DO NOT ask them to describe it again — use the description as your brief.' + : 'The user ran `pnpm task start` but did not include a description. Ask them to describe the task in one short chat message before proceeding.', ...descBlock, - 'Smart onboarding protocol — follow EXACTLY (full text in CLAUDE.md,', + 'Task onboarding protocol — follow EXACTLY (full text in CLAUDE.md,', '.cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md):', '', ' 1. Stop whatever you were about to do on this turn.', diff --git a/agent-scope/lib/onboarding.test.mjs b/agent-scope/lib/onboarding.test.mjs index 416963afc..a0c9c6aab 100644 --- a/agent-scope/lib/onboarding.test.mjs +++ b/agent-scope/lib/onboarding.test.mjs @@ -38,7 +38,7 @@ test('ONBOARDING_TRIGGER_TEXT starts with the canonical prefix and covers the pr ONBOARDING_TRIGGER_TEXT.startsWith('agent-scope: start task onboarding'), 'trigger must begin with the documented prefix', ); - assert.ok(ONBOARDING_TRIGGER_TEXT.includes('Smart onboarding protocol')); + assert.ok(ONBOARDING_TRIGGER_TEXT.includes('Task onboarding protocol')); assert.ok(ONBOARDING_TRIGGER_TEXT.includes('AskQuestion')); assert.ok(ONBOARDING_TRIGGER_TEXT.includes('pnpm task create')); }); diff --git a/agent-scope/lib/prompter.test.mjs b/agent-scope/lib/prompter.test.mjs index f54182513..fdd01cb5c 100644 --- a/agent-scope/lib/prompter.test.mjs +++ b/agent-scope/lib/prompter.test.mjs @@ -1,5 +1,5 @@ // Unit tests for the tiny prompter. Focuses on the paste-detection -// primitives used by `pnpm task start --smart`'s description reader — +// primitives used by `pnpm task start`'s description reader — // i.e. the `tryReadLine(timeoutMs)` helper and its interaction with // the blocking `ask()` path. diff --git a/agent-scope/lib/shell-parse.mjs b/agent-scope/lib/shell-parse.mjs index 612cddd4c..78f2e8f24 100644 --- a/agent-scope/lib/shell-parse.mjs +++ b/agent-scope/lib/shell-parse.mjs @@ -192,7 +192,7 @@ export function bodyTouchesProtected(body, protectedPatterns) { // paths (including `agent-scope/tasks/**`). That's the right default — we // don't want the agent to silently mint itself a wider task scope. // -// BUT: the smart-onboarding protocol ends with a plan-mode `AskQuestion` +// BUT: the onboarding protocol ends with a plan-mode `AskQuestion` // where the USER explicitly approves the proposed scope. Post-approval, // the agent should be able to run `pnpm task create ...` directly // without bouncing the command back to the user to copy-paste. diff --git a/agent-scope/lib/wizard.mjs b/agent-scope/lib/wizard.mjs deleted file mode 100644 index 5bfff4f5a..000000000 --- a/agent-scope/lib/wizard.mjs +++ /dev/null @@ -1,297 +0,0 @@ -// Pure helpers for the interactive `pnpm task start` wizard. -// -// Everything in this module is deterministic and side-effect-free once given -// a repo root, so every branch is unit-testable without a TTY. The actual -// interactive prompting lives in prompter.mjs; the CLI orchestration lives in -// bin/task.mjs. This file is the part you'd want to reuse if someone wanted -// to build (say) a VS Code command-palette version. - -import { - existsSync, readFileSync, readdirSync, statSync, -} from 'node:fs'; -import { resolve, join, relative, sep } from 'node:path'; - -// --------------------------------------------------------------------------- -// Package discovery -// --------------------------------------------------------------------------- -// -// Order of precedence: -// 1. pnpm-workspace.yaml (`packages:` list of globs/paths) -// 2. package.json `workspaces` (array or object.packages array) -// 3. fallback: scan `packages/*` -// -// We do a permissive line-based YAML parse so we don't pull in a dependency. -// The file format we care about is narrow and stable: -// -// packages: -// - "packages/*" -// - "demo" -// -// Anything fancier (nested keys, flow style) will just fall through to the -// workspaces / packages fallbacks. - -function parseWorkspaceYaml(text) { - const lines = text.split(/\r?\n/); - let inPkgs = false; - const out = []; - for (const raw of lines) { - const line = raw.replace(/#.*$/, ''); - if (/^packages\s*:\s*$/.test(line)) { inPkgs = true; continue; } - if (inPkgs && /^\S/.test(line)) break; - if (!inPkgs) continue; - const m = /^\s*-\s*["']?([^"'\s]+?)["']?\s*$/.exec(line); - if (m) out.push(m[1]); - } - return out; -} - -function parsePackageJsonWorkspaces(text) { - try { - const obj = JSON.parse(text); - const ws = obj && obj.workspaces; - if (Array.isArray(ws)) return ws.filter(s => typeof s === 'string'); - if (ws && Array.isArray(ws.packages)) return ws.packages.filter(s => typeof s === 'string'); - return []; - } catch { return []; } -} - -function expandGlobEntry(root, entry) { - // Only supports trailing `/*` (flat) and literal paths — enough for every - // real monorepo layout I've seen. If you need deeper expansion you should - // not be using the wizard anyway, just hand-author the manifest. - if (entry.endsWith('/*')) { - const base = entry.slice(0, -2); - const abs = resolve(root, base); - if (!existsSync(abs)) return []; - let names; - try { names = readdirSync(abs); } catch { return []; } - return names - .filter(n => !n.startsWith('.')) - .map(n => join(base, n)) - .filter(p => { - const full = resolve(root, p); - try { return statSync(full).isDirectory(); } catch { return false; } - }); - } - return [entry]; -} - -function readPackageName(root, pkgDir) { - const pj = resolve(root, pkgDir, 'package.json'); - if (!existsSync(pj)) return null; - try { - const obj = JSON.parse(readFileSync(pj, 'utf8')); - if (obj && typeof obj.name === 'string' && obj.name.trim()) return obj.name.trim(); - } catch { /* fall through */ } - return null; -} - -function shortName(pkgDir, fullName) { - if (fullName && fullName.includes('/')) { - const tail = fullName.split('/').pop(); - if (tail) return tail; - } - const parts = pkgDir.split('/'); - return parts[parts.length - 1] || pkgDir; -} - -export function discoverPackages(root) { - const entries = []; - - const wsYaml = resolve(root, 'pnpm-workspace.yaml'); - if (existsSync(wsYaml)) { - try { entries.push(...parseWorkspaceYaml(readFileSync(wsYaml, 'utf8'))); } - catch { /* ignore */ } - } - if (!entries.length) { - const pj = resolve(root, 'package.json'); - if (existsSync(pj)) { - try { entries.push(...parsePackageJsonWorkspaces(readFileSync(pj, 'utf8'))); } - catch { /* ignore */ } - } - } - if (!entries.length) entries.push('packages/*'); - - const dirs = new Set(); - for (const e of entries) { - for (const p of expandGlobEntry(root, e)) { - const pj = resolve(root, p, 'package.json'); - if (existsSync(pj)) dirs.add(p.split(sep).join('/')); - } - } - - const pkgs = []; - for (const pkgDir of [...dirs].sort()) { - const pjName = readPackageName(root, pkgDir); - const displayName = shortName(pkgDir, pjName); - pkgs.push({ - path: pkgDir, - name: displayName, - fullName: pjName || null, - }); - } - return pkgs; -} - -// --------------------------------------------------------------------------- -// Task-id derivation -// --------------------------------------------------------------------------- - -const ID_MAX = 48; - -export function deriveTaskId(description, { existingIds = [] } = {}) { - const base = (description || '') - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-+|-+$/g, '') - .slice(0, ID_MAX) - .replace(/^-+|-+$/g, ''); - - const fallback = () => { - const stamp = new Date().toISOString().replace(/[^0-9]/g, '').slice(0, 14); - return `task-${stamp}`; - }; - - let id = base; - if (!id || !/^[a-z0-9]/.test(id)) id = fallback(); - - if (!existingIds.includes(id)) return id; - for (let i = 2; i < 1000; i++) { - const suffix = `-${i}`; - const trimmed = id.slice(0, ID_MAX - suffix.length) + suffix; - if (!existingIds.includes(trimmed)) return trimmed; - } - return fallback(); -} - -// --------------------------------------------------------------------------- -// Keyword-based suggestion -// --------------------------------------------------------------------------- - -const STOP_WORDS = new Set([ - 'a','an','the','and','or','but','to','of','in','on','at','for','from','with', - 'by','as','is','are','was','were','be','been','being','do','does','did','can', - 'could','should','would','will','shall','may','might','must','this','that', - 'these','those','it','its','into','onto','over','under','about','through', - 'some','any','all','no','not','we','you','i','me','my','our','their','there', - 'here','up','down','out','if','then','than','so','very','just','also','too', - 'work','task','feature','feat','fix','bug','refactor','improve','add','remove', - 'rework','update','change','changes','new','old', -]); - -function tokenize(text) { - return (text || '') - .toLowerCase() - .split(/[^a-z0-9]+/) - .filter(t => t.length >= 2 && !STOP_WORDS.has(t)); -} - -function scorePackage(descTokens, descLower, pkg) { - const nameLower = pkg.name.toLowerCase(); - const pathLower = pkg.path.toLowerCase(); - const fullLower = (pkg.fullName || '').toLowerCase(); - - let score = 0; - if (descLower.includes(nameLower) && nameLower.length >= 3) score += 4; - if (descLower.includes(pathLower)) score += 3; - if (fullLower && descLower.includes(fullLower)) score += 3; - - const nameTokens = new Set([ - ...nameLower.split(/[-_/]+/).filter(Boolean), - ...fullLower.split(/[-_/@]+/).filter(Boolean), - ]); - - for (const t of descTokens) { - if (nameTokens.has(t)) score += 2; - else if (t.length >= 4 && (nameLower.includes(t) || pathLower.includes(t))) score += 1; - } - return score; -} - -export function suggestPackagesFromDescription(description, packages, { max } = {}) { - if (!Array.isArray(packages) || packages.length === 0) return []; - const descLower = (description || '').toLowerCase(); - const descTokens = tokenize(description); - if (descTokens.length === 0) return []; - - const scored = packages - .map(p => ({ pkg: p, score: scorePackage(descTokens, descLower, p) })) - .filter(s => s.score > 0) - .sort((a, b) => b.score - a.score); - - const cap = Math.max(1, Math.min(max ?? Math.ceil(packages.length / 2), scored.length)); - return scored.slice(0, cap).map(s => s.pkg); -} - -// --------------------------------------------------------------------------- -// Glob drafting + manifest composition -// --------------------------------------------------------------------------- - -const DEFAULT_DENY = ['!**/secrets.*', '!**/.env*']; -const DEFAULT_BUILD_EXEMPTIONS = ['**/dist/**', '**/*.tsbuildinfo', 'pnpm-lock.yaml']; - -export function draftGlobs(selectedPackages, opts = {}) { - const { - includeBuildArtifacts = true, - extraAllowed = [], - extraDeny = [], - } = opts; - - const allowed = []; - const seenAllowed = new Set(); - const push = (p) => { - if (typeof p !== 'string') return; - const trimmed = p.trim(); - if (!trimmed || seenAllowed.has(trimmed)) return; - seenAllowed.add(trimmed); - allowed.push(trimmed); - }; - - for (const pkg of selectedPackages || []) { - const path = typeof pkg === 'string' ? pkg : pkg.path; - if (!path) continue; - push(`${path.replace(/\/+$/, '')}/**`); - } - for (const p of extraAllowed) push(p); - for (const p of extraDeny) { - const withBang = p.startsWith('!') ? p : `!${p}`; - push(withBang); - } - for (const d of DEFAULT_DENY) push(d); - - const exemptions = includeBuildArtifacts ? [...DEFAULT_BUILD_EXEMPTIONS] : []; - - return { allowed, exemptions }; -} - -export function buildManifest({ - id, - description, - selectedPackages, - extraAllowed = [], - extraDeny = [], - includeBuildArtifacts = true, - inheritBase = true, - existingIds = [], - now = () => new Date().toISOString(), -}) { - const finalId = id && /^[a-z0-9][a-z0-9-_.]{0,63}$/.test(id) - ? id - : deriveTaskId(description, { existingIds }); - - const { allowed, exemptions } = draftGlobs(selectedPackages, { - includeBuildArtifacts, - extraAllowed, - extraDeny, - }); - - const manifest = { - id: finalId, - description: description ? description.trim() : undefined, - created: now(), - inherits: inheritBase ? ['base'] : undefined, - allowed: allowed.length ? allowed : undefined, - exemptions: exemptions.length ? exemptions : undefined, - }; - return Object.fromEntries(Object.entries(manifest).filter(([, v]) => v !== undefined)); -} diff --git a/agent-scope/lib/wizard.test.mjs b/agent-scope/lib/wizard.test.mjs deleted file mode 100644 index 945a1578b..000000000 --- a/agent-scope/lib/wizard.test.mjs +++ /dev/null @@ -1,262 +0,0 @@ -// Unit tests for the wizard pure-logic. -// node --test agent-scope/lib/wizard.test.mjs - -import { test } from 'node:test'; -import assert from 'node:assert/strict'; -import { - mkdtempSync, mkdirSync, writeFileSync, rmSync, -} from 'node:fs'; -import { tmpdir } from 'node:os'; -import { join } from 'node:path'; -import { - discoverPackages, - deriveTaskId, - suggestPackagesFromDescription, - draftGlobs, - buildManifest, -} from './wizard.mjs'; - -function makeRepo() { - const root = mkdtempSync(join(tmpdir(), 'agent-scope-wizard-')); - mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); - return root; -} - -function writePkg(root, relPath, name) { - const full = join(root, relPath); - mkdirSync(full, { recursive: true }); - writeFileSync(join(full, 'package.json'), JSON.stringify({ name }, null, 2)); -} - -// --- discoverPackages ----------------------------------------------------- - -test('discoverPackages: pnpm-workspace.yaml with packages/*', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'pnpm-workspace.yaml'), - 'packages:\n - "packages/*"\n - "demo"\n'); - writePkg(root, 'packages/agent', '@x/agent'); - writePkg(root, 'packages/core', '@x/core'); - writePkg(root, 'demo', '@x/demo'); - // A directory without package.json should be skipped. - mkdirSync(join(root, 'packages/no-pkg')); - - const pkgs = discoverPackages(root); - const names = pkgs.map(p => p.name).sort(); - assert.deepEqual(names, ['agent', 'core', 'demo']); - assert.ok(pkgs.every(p => typeof p.path === 'string' && p.path.length > 0)); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('discoverPackages: falls back to package.json workspaces', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'package.json'), - JSON.stringify({ name: 'root', workspaces: ['libs/*'] })); - writePkg(root, 'libs/alpha', 'alpha'); - writePkg(root, 'libs/beta', 'beta'); - - const pkgs = discoverPackages(root); - assert.deepEqual(pkgs.map(p => p.name).sort(), ['alpha', 'beta']); - assert.ok(pkgs.every(p => p.path.startsWith('libs/'))); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('discoverPackages: falls back to packages/* scan when nothing declared', () => { - const root = makeRepo(); - try { - writePkg(root, 'packages/lone', 'lone'); - const pkgs = discoverPackages(root); - assert.deepEqual(pkgs.map(p => p.name), ['lone']); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('discoverPackages: empty repo → empty', () => { - const root = makeRepo(); - try { - assert.deepEqual(discoverPackages(root), []); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('discoverPackages: ignores dotfile subdirs when expanding globs', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'pnpm-workspace.yaml'), 'packages:\n - "packages/*"\n'); - writePkg(root, 'packages/real', 'real'); - mkdirSync(join(root, 'packages/.hidden'), { recursive: true }); - const pkgs = discoverPackages(root); - assert.deepEqual(pkgs.map(p => p.name), ['real']); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -// --- deriveTaskId --------------------------------------------------------- - -test('deriveTaskId: kebab-cases a description', () => { - assert.equal(deriveTaskId('Refactor Peer Sync'), 'refactor-peer-sync'); -}); - -test('deriveTaskId: strips leading/trailing dashes', () => { - assert.equal(deriveTaskId(' -- hello world -- '), 'hello-world'); -}); - -test('deriveTaskId: truncates to 48 chars', () => { - const long = 'a'.repeat(100); - const id = deriveTaskId(long); - assert.ok(id.length <= 48, `got ${id.length}`); - assert.match(id, /^a+$/); -}); - -test('deriveTaskId: empty → task-', () => { - const id = deriveTaskId(''); - assert.match(id, /^task-\d{8,14}$/); -}); - -test('deriveTaskId: colon-only → task-', () => { - const id = deriveTaskId('!!!'); - assert.match(id, /^task-\d{8,14}$/); -}); - -test('deriveTaskId: collision → appends -2, -3', () => { - const existing = ['fix-auth', 'fix-auth-2']; - const id = deriveTaskId('fix auth', { existingIds: existing }); - assert.equal(id, 'fix-auth-3'); -}); - -// --- suggestPackagesFromDescription --------------------------------------- - -const SAMPLE_PKGS = [ - { path: 'packages/agent', name: 'agent' }, - { path: 'packages/core', name: 'core' }, - { path: 'packages/publisher', name: 'publisher' }, - { path: 'packages/storage', name: 'storage' }, - { path: 'packages/evm-module',name: 'evm-module' }, - { path: 'packages/cli', name: 'cli' }, -]; - -test('suggestPackages: exact name match wins', () => { - const s = suggestPackagesFromDescription('refactor peer sync in agent and core', SAMPLE_PKGS); - const names = s.map(p => p.name); - assert.ok(names.includes('agent'), names.join(',')); - assert.ok(names.includes('core'), names.join(',')); -}); - -test('suggestPackages: token inside compound name (evm)', () => { - const s = suggestPackagesFromDescription('improve evm deployment', SAMPLE_PKGS); - assert.ok(s.some(p => p.name === 'evm-module'), s.map(x => x.name).join(',')); -}); - -test('suggestPackages: empty description → empty', () => { - assert.deepEqual(suggestPackagesFromDescription('', SAMPLE_PKGS), []); -}); - -test('suggestPackages: no match → empty', () => { - const s = suggestPackagesFromDescription('write unrelated documentation for readme', SAMPLE_PKGS); - assert.equal(s.length, 0); -}); - -test('suggestPackages: ignores 1-char / stopword tokens', () => { - // 'a' 'to' 'the' would otherwise match 'agent', 'storage', 'publisher' - const s = suggestPackagesFromDescription('a to the', SAMPLE_PKGS); - assert.equal(s.length, 0); -}); - -test('suggestPackages: caps at ceil(n/2) by default', () => { - const s = suggestPackagesFromDescription( - 'agent core publisher storage evm cli', - SAMPLE_PKGS, - ); - assert.ok(s.length <= Math.ceil(SAMPLE_PKGS.length / 2), - `suggestions: ${s.map(p => p.name).join(',')}`); -}); - -// --- draftGlobs ----------------------------------------------------------- - -test('draftGlobs: one package → one allowed entry plus deny negations', () => { - const { allowed, exemptions } = draftGlobs( - [{ path: 'packages/agent', name: 'agent' }], - { includeBuildArtifacts: false }, - ); - assert.deepEqual(allowed, [ - 'packages/agent/**', - '!**/secrets.*', - '!**/.env*', - ]); - assert.deepEqual(exemptions, []); -}); - -test('draftGlobs: multiple packages + build exemptions', () => { - const { allowed, exemptions } = draftGlobs( - [{ path: 'packages/agent' }, { path: 'packages/core' }], - { includeBuildArtifacts: true }, - ); - assert.ok(allowed.includes('packages/agent/**')); - assert.ok(allowed.includes('packages/core/**')); - assert.ok(allowed.includes('!**/secrets.*')); - assert.deepEqual(exemptions, ['**/dist/**', '**/*.tsbuildinfo', 'pnpm-lock.yaml']); -}); - -test('draftGlobs: extraAllowed + extraDeny', () => { - const { allowed } = draftGlobs([], { - includeBuildArtifacts: false, - extraAllowed: ['scripts/my-tool.ts'], - extraDeny: ['config/**', '!already/!prefixed.ts'], - }); - assert.ok(allowed.includes('scripts/my-tool.ts')); - assert.ok(allowed.includes('!config/**')); - assert.ok(allowed.includes('!already/!prefixed.ts')); -}); - -test('draftGlobs: deduplicates identical entries', () => { - const { allowed } = draftGlobs( - [{ path: 'packages/agent' }, { path: 'packages/agent/' }], - { includeBuildArtifacts: false }, - ); - assert.equal(allowed.filter(a => a === 'packages/agent/**').length, 1); -}); - -test('draftGlobs: accepts raw path strings as well as {path} objects', () => { - const { allowed } = draftGlobs( - ['packages/mixed', { path: 'packages/object' }], - { includeBuildArtifacts: false }, - ); - assert.ok(allowed.includes('packages/mixed/**')); - assert.ok(allowed.includes('packages/object/**')); -}); - -// --- buildManifest -------------------------------------------------------- - -test('buildManifest: composes a valid-looking manifest', () => { - const m = buildManifest({ - id: 'my-task', - description: 'Refactor sync', - selectedPackages: [{ path: 'packages/agent' }], - includeBuildArtifacts: true, - inheritBase: true, - now: () => '2026-01-01T00:00:00.000Z', - }); - assert.equal(m.id, 'my-task'); - assert.equal(m.description, 'Refactor sync'); - assert.equal(m.created, '2026-01-01T00:00:00.000Z'); - assert.deepEqual(m.inherits, ['base']); - assert.ok(m.allowed.includes('packages/agent/**')); - assert.ok(m.exemptions.includes('**/dist/**')); -}); - -test('buildManifest: invalid id → derives from description', () => { - const m = buildManifest({ - id: '---bad---', - description: 'Fix staking flow', - selectedPackages: [{ path: 'packages/chain' }], - }); - assert.match(m.id, /^fix-staking-flow/); -}); - -test('buildManifest: no inheritBase → no inherits field', () => { - const m = buildManifest({ - id: 'isolated', - description: 'd', - selectedPackages: [{ path: 'packages/x' }], - inheritBase: false, - }); - assert.equal(m.inherits, undefined); -}); diff --git a/package.json b/package.json index 199c1db07..1f6e85b68 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ "test:game:e2e": "pnpm --filter @origintrail-official/dkg-app-origin-trail-game test:e2e", "test:all": "pnpm test && pnpm test:evm && pnpm test:game:ui", "task": "node agent-scope/bin/task.mjs", - "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs agent-scope/lib/wizard.test.mjs agent-scope/lib/prompter.test.mjs", + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs agent-scope/lib/prompter.test.mjs", "scope:validate": "node agent-scope/bin/task.mjs validate", "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show", "scope:check-agent": "node agent-scope/bin/task.mjs check-agent" From e0d05bfd68f191f00fdebb7070805a5d07661973 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 13:09:26 +0200 Subject: [PATCH 12/21] agent-scope: shorter, more human CLI copy for `pnpm task start` Strip the explanatory paragraphs, parentheticals, and marker-file plumbing from the user-facing output. Keep only what someone actually needs to read: what to type, and what to do next. Made-with: Cursor --- agent-scope/bin/task.mjs | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index b793ebd91..0bfdaec19 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -210,28 +210,22 @@ async function init(id) { async function start() { const { id: activeId } = resolveActiveTaskId(root); if (activeId) { - console.log(`A task is already active: ${activeId}`); - console.log(`Run \`pnpm task clear\` first if you want to start a new one.`); - console.log(`Run \`pnpm task show\` to see its scope.`); + console.log(`You already have a task going: ${activeId}`); + console.log(`Run \`pnpm task clear\` to drop it, or \`pnpm task show\` to see it.`); bootstrapWarning(); return; } if (!process.stdin.isTTY) { - console.error('error: `pnpm task start` requires an interactive terminal.'); - console.error(''); - console.error('For non-interactive / CI use, call `pnpm task create` directly:'); - console.error(' pnpm task create --description "..." \\'); - console.error(' --allowed "packages/foo/**" --inherits base --activate'); + console.error('Run `pnpm task start` in a real terminal — it asks you a question.'); + console.error('For CI or scripts, use `pnpm task create` directly.'); process.exit(2); } const prompter = createPrompter(); let description = ''; try { - console.log('What are you working on?'); - console.log('(One or two sentences is plenty. Paste longer briefs if you have them.)'); - console.log('Press Enter to send.'); + console.log('What are you working on? One or two sentences is fine.'); console.log(''); description = await prompter.askPasteableDescription('> '); } finally { @@ -240,31 +234,21 @@ async function start() { const trimmed = description.trim(); if (!trimmed || trimmed.length < 10) { - bail('description is too short — give me at least a sentence of context'); + bail('Give me at least a sentence so I know what you\'re doing.'); } const trigger = buildOnboardingTrigger({ description: trimmed }); - const markerPath = writeOnboardingMarker(root, trigger); + writeOnboardingMarker(root, trigger); const clip = copyToClipboard(trigger); console.log(''); - console.log(`Got it — captured ${trimmed.split(/\s+/).length} words.`); - console.log(''); - console.log('Now open any chat and send any message ("go", "hi", whatever).'); - console.log('Your agent will read the description, explore the repo, and'); - console.log('ask you one quick question to approve the proposed scope.'); - console.log(''); + console.log('Got it. Open any chat and say hi.'); + console.log('I\'ll read what you wrote, look around the repo, and ask you'); + console.log('to OK a scope before I touch anything.'); if (clip.ok) { - console.log(`(Trigger also copied to clipboard via ${clip.method}.)`); - } else { - console.log(`(Clipboard copy unavailable: ${clip.reason}. No paste needed —`); - console.log(` any message triggers onboarding because of the marker file.)`); + console.log(''); + console.log('(Also copied to your clipboard, just in case.)'); } - console.log(''); - console.log(`Marker file: ${markerPath}`); - console.log('(Auto-deleted the moment the agent reads it; one-shot.)'); - console.log(''); - console.log('Change your mind? `rm agent-scope/.pending-onboarding` and start over.'); bootstrapWarning(); } From 6c5a7cc8460b1a609eaebb45c08a1434495cd583 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 13:30:17 +0200 Subject: [PATCH 13/21] agent-scope: tighten CLI copy and plan-mode prompt UX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `pnpm task start` prompt drops the "one or two sentences" hint; just asks "What are you working on?". - Post-capture message: "Send any message in chat (e.g. `start working`)" and "ask you to accept a scope" (was "OK a scope"). - `custom_instruction` option label rewritten from "Something else — tell me what" / "Tell me what to change" to "Type what you want instead" across denial.mjs, onboarding trigger, and rule files (cursor, claude, agents, readme) — makes it unambiguous that option B is the free-text entry. - Onboarding scope proposal now uses a numbered list (1), 2), 3) …) instead of bullet points. Examples + rules updated in sync. Made-with: Cursor --- .cursor/rules/agent-scope.mdc | 14 ++++++++------ AGENTS.md | 11 ++++++----- CLAUDE.md | 11 ++++++----- agent-scope/README.md | 14 ++++++-------- agent-scope/bin/task.mjs | 6 +++--- agent-scope/lib/denial.mjs | 2 +- agent-scope/lib/denial.test.mjs | 2 +- agent-scope/lib/onboarding.mjs | 6 +++--- 8 files changed, 34 insertions(+), 32 deletions(-) diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index 953b4b1d7..ccdda1f4f 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -115,17 +115,19 @@ Follow these steps exactly: - `allow_multiple`: `false` - `prompt`: two short sentences. First: a one-line rephrase of what you understood the task to be. Second: the scope you'd propose, as a short - bullet list of globs (3–5 lines max). Finish with "Sound good?" Example: + **numbered** list of globs (3–5 lines max). Finish with "Sound good?" + Example: > Refactor peer sync to use the new workspace auth. I'd scope it to: - > • `packages/agent/**` - > • `packages/core/**` + > 1) `packages/agent/**` + > 2) `packages/core/**` + > 3) inherit `base` (standard build-artefact exemptions) > > Sound good? - `options` (IDs must match exactly — only these two): - `go` — `"Yes, go with that"` - - `custom_instruction` — `"Tell me what to change"` + - `custom_instruction` — `"Type what you want instead"` 5. **On `go`:** **YOU (the agent) run the command directly** via the Shell tool. The `afterShellExecution` hook has a narrow allowlist for the @@ -198,7 +200,7 @@ The JSON shape (TypeScript for clarity): activeTask: string | null, simpleOptions: [ // exactly two entries — SURFACE THESE { id: string, label: string, action: { kind: "..." /* + fields */ } }, - { id: "custom_instruction", label: "Something else — tell me what", + { id: "custom_instruction", label: "Type what you want instead", action: { kind: "custom" } } ], recommendedOptionId: string, // matches simpleOptions[0].id @@ -248,7 +250,7 @@ The JSON shape (TypeScript for clarity): ### Phrasing rules (for onboarding AND denials) - Write like you're texting a coworker, not filing a ticket. -- One short question. One recommendation. One "something else" option. +- One short question. One recommendation. One "type what you want" option. - Never use ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). - Don't explain the internal architecture in the prompt — the user doesn't need to know about hooks or manifests to answer. Keep it action-focused: diff --git a/AGENTS.md b/AGENTS.md index 4f5ca15d2..e12424905 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -90,17 +90,18 @@ active: 4. Propose the scope to the user as **one short question with two options**. Write it like you're asking a coworker, not filling out a form. 3 sentences max: one-line rephrase of the task, the scope you'd - propose (3–5 bullet globs), then "Sound good?" Example: + propose (3–5 numbered globs), then "Sound good?" Example: > Refactor peer sync to use the new workspace auth. I'd scope it to: - > • `packages/agent/**` - > • `packages/core/**` + > 1) `packages/agent/**` + > 2) `packages/core/**` + > 3) inherit `base` (standard build-artefact exemptions) > > Sound good? Options (only these two, IDs exactly): - `go` — `"Yes, go with that"` - - `custom_instruction` — `"Tell me what to change"` + - `custom_instruction` — `"Type what you want instead"` 5. On `go`: **run the command yourself** via your shell tool, then continue with the actual work in the same turn. The command's @@ -162,7 +163,7 @@ verbatim** — never surface the verbose `options` list: > continue? - Options = `simpleOptions` verbatim (exactly two entries: the - recommendation and "Something else — tell me what"). + recommendation and "Type what you want instead"). Match the user's answer to the chosen `action.kind` and carry it out. If they pick `custom_instruction`, ask them in plain chat what they'd like diff --git a/CLAUDE.md b/CLAUDE.md index fbb8f39f9..3c870edc1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -265,17 +265,18 @@ protocol below. 4. **Propose the scope via a SINGLE `AskQuestion` — one question, two options.** Write it like you're asking a coworker, not filling out a form. 3 sentences max: one-line rephrase of the task, the scope you'd - propose (3–5 bullet globs), then "Sound good?" Example: + propose (3–5 numbered globs), then "Sound good?" Example: > Refactor peer sync to use the new workspace auth. I'd scope it to: - > • `packages/agent/**` - > • `packages/core/**` + > 1) `packages/agent/**` + > 2) `packages/core/**` + > 3) inherit `base` (standard build-artefact exemptions) > > Sound good? Options (IDs must match exactly — only these two): - `go` — `"Yes, go with that"` - - `custom_instruction` — `"Tell me what to change"` + - `custom_instruction` — `"Type what you want instead"` 5. **On `go`**, **YOU (the agent) run the command yourself** via the Shell tool. The `afterShellExecution` hook has a narrow allowlist for @@ -326,7 +327,7 @@ The JSON shape (key fields only): simpleOptions: [ // exactly two entries — SURFACE these { id, label, action }, // the recommended option { id: "custom_instruction", // free-text fallback (always present) - label: "Something else — tell me what", + label: "Type what you want instead", action: { kind: "custom" } } ], recommendedOptionId: string, // matches simpleOptions[0].id diff --git a/agent-scope/README.md b/agent-scope/README.md index 91175619c..35246158b 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -237,8 +237,6 @@ agent takes it from there. 1. **You run `pnpm task start`** in the terminal. The CLI prompts: > What are you working on? - > (One or two sentences is plenty. Paste longer briefs if you have them.) - > Press Enter to send. Single Enter submits; multi-line pastes are captured in full via paste-detection. @@ -265,10 +263,10 @@ agent takes it from there. relevant files. Counts matching files per candidate package. 3. Proposes a scope via a **single short `AskQuestion`** — one question, two options. The prompt is a one-line rephrase of the - task + the scope as 3–5 bullet globs + "Sound good?" The options - are: + task + the scope as a numbered list (`1)`, `2)`, `3)` …) + "Sound + good?" The options are: - `go` — "Yes, go with that" - - `custom_instruction` — "Tell me what to change" + - `custom_instruction` — "Type what you want instead" 4. On `go`, the agent itself runs `pnpm task create ...` via the shell tool. The `afterShellExecution` / PostToolUse-Bash hooks recognise the canonical task-create invocation and allow its two @@ -448,7 +446,7 @@ The structured block is fenced by HTML comments so it's trivial to locate: "alternativeTasks": [ { "id": "staking", "description": "..." } ], "simpleOptions": [ { "id": "add_glob", "label": "Add this folder to the task and try again", "action": { "kind": "add_to_manifest", ... } }, - { "id": "custom_instruction", "label": "Something else — tell me what", "action": { "kind": "custom" } } + { "id": "custom_instruction", "label": "Type what you want instead", "action": { "kind": "custom" } } ], "options": [ /* full verbose list — audit/back-compat only, NOT surfaced to users */ @@ -481,8 +479,8 @@ Extra guidance in the block: - `simpleOptions` always has **exactly two entries**: the LLM-recommended action (short human label like "Add this folder to the task and try again", "Skip it", "Yes, unlock it so I can do this edit") and a - free-text fallback `custom_instruction` → `"Something else — tell me - what"`. Agents surface these two options and **never** surface the + free-text fallback `custom_instruction` → `"Type what you want + instead"`. Agents surface these two options and **never** surface the verbose `options` list. - `options` is the verbose, audit-grade list (add_file, add_glob, switch tasks, skip, cancel, bootstrap, fix_manifest, clear_task, custom). It is diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index 0bfdaec19..bd3859a8c 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -225,7 +225,7 @@ async function start() { const prompter = createPrompter(); let description = ''; try { - console.log('What are you working on? One or two sentences is fine.'); + console.log('What are you working on?'); console.log(''); description = await prompter.askPasteableDescription('> '); } finally { @@ -242,9 +242,9 @@ async function start() { const clip = copyToClipboard(trigger); console.log(''); - console.log('Got it. Open any chat and say hi.'); + console.log('Got it. Send any message in chat (e.g. `start working`).'); console.log('I\'ll read what you wrote, look around the repo, and ask you'); - console.log('to OK a scope before I touch anything.'); + console.log('to accept a scope before I touch anything.'); if (clip.ok) { console.log(''); console.log('(Also copied to your clipboard, just in case.)'); diff --git a/agent-scope/lib/denial.mjs b/agent-scope/lib/denial.mjs index 30f6b69fa..789019fcf 100644 --- a/agent-scope/lib/denial.mjs +++ b/agent-scope/lib/denial.mjs @@ -85,7 +85,7 @@ const CUSTOM_OPTION = { // like a chat button, not a legal clause. const CUSTOM_OPTION_SIMPLE = { id: 'custom_instruction', - label: 'Something else — tell me what', + label: 'Type what you want instead', action: { kind: 'custom' }, }; diff --git a/agent-scope/lib/denial.test.mjs b/agent-scope/lib/denial.test.mjs index 84bf0045b..9d32f6597 100644 --- a/agent-scope/lib/denial.test.mjs +++ b/agent-scope/lib/denial.test.mjs @@ -606,7 +606,7 @@ test('simpleOptions: recommended labels are short and natural', () => { assert.equal(p2.simpleOptions[0].label, 'Skip it'); // custom label is the natural one too - assert.equal(p2.simpleOptions[1].label, 'Something else — tell me what'); + assert.equal(p2.simpleOptions[1].label, 'Type what you want instead'); } finally { cleanup(root); } }); diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs index e5182e9a1..996282c58 100644 --- a/agent-scope/lib/onboarding.mjs +++ b/agent-scope/lib/onboarding.mjs @@ -85,10 +85,10 @@ export function buildOnboardingTrigger({ description = '' } = {}) { ' 6. Propose the scope via a SINGLE `AskQuestion` — ONE question, TWO', ' options. Write it like you are asking a coworker. Three', ' sentences max: one-line rephrase of the task, the scope you', - ' propose as 3–5 bullet globs, then "Sound good?" Options (ids', - ' exactly):', + ' propose as a 3–5 item NUMBERED list (1) 2) 3) ...), then', + ' "Sound good?" Options (ids exactly):', ' - go — "Yes, go with that"', - ' - custom_instruction — "Tell me what to change"', + ' - custom_instruction — "Type what you want instead"', '', ' 7. On `go`: YOU run `pnpm task create --description "..." \\', ' --allowed "..." --inherits base --activate` via the Shell tool.', From 0d393d3a7f28fd438da5851a6cf71117039147e8 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 13:56:26 +0200 Subject: [PATCH 14/21] agent-scope: fix two afterShell bugs (fd-redirect parse + manifest reaper) Two bugs were causing correct agent-scope flows to break: 1. `2>&1` and similar fd-duplication tokens were being parsed as writes to a file literally named `&1`, so read-only commands like `pnpm task show 2>&1`, `tsc --noEmit 2>&1`, or `wc -l foo 2>&1` got denied by beforeShellExecution. Fixed in shell-parse.extractRedirections by skipping any redirect target that starts with `&` (fd reference, not a path). Added unit tests for `2>&1`, `1>&2`, `>&1`, `1>&-`, `&>&1`, and the `&>/dev/null` positive case. 2. The afterShellExecution reaper was deleting the currently active task's manifest (`agent-scope/tasks/.json`) and the `agent-scope/active` pointer whenever any subsequent shell command ran, because those files are untracked and live in a protected path. The existing approved-write allowlist only covered the exact `pnpm task create` turn, not future turns. Added an active-task exemption in both the Cursor and Claude Code hooks so the active task's own state survives unrelated shell calls. Stale manifests for other ids are still reverted/deleted. Made-with: Cursor --- .claude/hooks/shell-diff-check.mjs | 14 +++++++++++++ .cursor/hooks/shell-diff-check.mjs | 14 +++++++++++++ agent-scope/lib/shell-parse.mjs | 15 +++++++++++--- agent-scope/lib/shell-parse.test.mjs | 31 ++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/.claude/hooks/shell-diff-check.mjs b/.claude/hooks/shell-diff-check.mjs index 3bf2e9a69..136c10359 100755 --- a/.claude/hooks/shell-diff-check.mjs +++ b/.claude/hooks/shell-diff-check.mjs @@ -95,12 +95,26 @@ async function main() { const approvedWrites = approvedTaskCreateWrites(approvedId); const approved = []; + // Active-task state exemption: the currently active task's manifest and + // the `active` pointer file are legitimate persistent state, not + // collateral from the current command. Without this, a manifest created + // by an earlier `pnpm task create` gets reaped the next time ANY + // unrelated shell command runs (because it shows up as untracked in a + // protected path). Only shield the active-task id — every other + // manifest (including stale ones) is still reverted/deleted. + const activeTaskExemptions = new Set(); + if (taskId) { + activeTaskExemptions.add(`agent-scope/tasks/${taskId}.json`); + activeTaskExemptions.add('agent-scope/active'); + } + const entries = parsePorcelain(porcelain); const outOfScope = entries.filter(({ path }) => { if (!path) return false; const d = checkPath(task, path, root); if (d !== 'deny' && d !== 'protected') return false; if (approvedWrites.has(path)) { approved.push(path); return false; } + if (activeTaskExemptions.has(path)) return false; return true; }); diff --git a/.cursor/hooks/shell-diff-check.mjs b/.cursor/hooks/shell-diff-check.mjs index e149605fc..3cfca3d5f 100755 --- a/.cursor/hooks/shell-diff-check.mjs +++ b/.cursor/hooks/shell-diff-check.mjs @@ -85,12 +85,26 @@ async function main() { const approvedWrites = approvedTaskCreateWrites(approvedId); const approved = []; + // Active-task state exemption: the currently active task's manifest and + // the `active` pointer file are legitimate persistent state, not + // collateral from the current command. Without this, a manifest created + // by an earlier `pnpm task create` gets reaped the next time ANY + // unrelated shell command runs (because it shows up as untracked in a + // protected path). Only shield the active-task id — every other + // manifest (including stale ones) is still reverted/deleted. + const activeTaskExemptions = new Set(); + if (taskId) { + activeTaskExemptions.add(`agent-scope/tasks/${taskId}.json`); + activeTaskExemptions.add('agent-scope/active'); + } + const entries = parsePorcelain(porcelain); const outOfScope = entries.filter(({ path }) => { if (!path) return false; const d = checkPath(task, path, root); if (d !== 'deny' && d !== 'protected') return false; if (approvedWrites.has(path)) { approved.push(path); return false; } + if (activeTaskExemptions.has(path)) return false; return true; }); diff --git a/agent-scope/lib/shell-parse.mjs b/agent-scope/lib/shell-parse.mjs index 78f2e8f24..bee5e9300 100644 --- a/agent-scope/lib/shell-parse.mjs +++ b/agent-scope/lib/shell-parse.mjs @@ -44,16 +44,25 @@ export function tokenize(cmd) { return out; } +// File-descriptor duplication (e.g. `2>&1`, `1>&2`, `>&1`, `1>&-`) is NOT a +// write to a file — the target starting with `&` references another fd, not +// a path. Without this guard, a harmless `cmd 2>&1` gets blocked because the +// parser thinks it redirects to a file called `&1`. +function isFdDupTarget(s) { + return typeof s === 'string' && s.startsWith('&'); +} + export function extractRedirections(tokens) { const targets = []; + const push = (v) => { if (v && !isFdDupTarget(v)) targets.push(v); }; for (let i = 0; i < tokens.length; i++) { const t = tokens[i]; if (t === '>' || t === '>>' || t === '&>' || t === '>|') { - if (tokens[i + 1]) targets.push(tokens[i + 1]); + push(tokens[i + 1]); } else if (/^[0-9]*>>?$/.test(t)) { - if (tokens[i + 1]) targets.push(tokens[i + 1]); + push(tokens[i + 1]); } else if (/^([0-9]*>>?|&>)[^\s]+/.test(t)) { - targets.push(t.replace(/^([0-9]*>>?|&>)/, '')); + push(t.replace(/^([0-9]*>>?|&>)/, '')); } else if (t === 'tee' || t === '/usr/bin/tee') { for (let j = i + 1; j < tokens.length; j++) { const a = tokens[j]; diff --git a/agent-scope/lib/shell-parse.test.mjs b/agent-scope/lib/shell-parse.test.mjs index 7f8f7aeaa..73f686f41 100644 --- a/agent-scope/lib/shell-parse.test.mjs +++ b/agent-scope/lib/shell-parse.test.mjs @@ -61,6 +61,37 @@ test('extractRedirections: no redirect', () => { assert.deepEqual(extractRedirections(tokenize('ls -la')), []); }); +test('extractRedirections: 2>&1 is fd dup, not a file write', () => { + assert.deepEqual(extractRedirections(tokenize('cmd arg 2>&1')), []); +}); + +test('extractRedirections: 1>&2 is fd dup, not a file write', () => { + assert.deepEqual(extractRedirections(tokenize('cmd 1>&2')), []); +}); + +test('extractRedirections: >&1 is fd dup, not a file write', () => { + assert.deepEqual(extractRedirections(tokenize('cmd >&1')), []); +}); + +test('extractRedirections: 1>&- close fd is not a file write', () => { + assert.deepEqual(extractRedirections(tokenize('cmd 1>&-')), []); +}); + +test('extractRedirections: &>&1 is fd dup, not a file write', () => { + assert.deepEqual(extractRedirections(tokenize('cmd &>&1')), []); +}); + +test('extractRedirections: pipe with 2>&1 and real file write', () => { + assert.deepEqual( + extractRedirections(tokenize('cmd 2>&1 > out.log')), + ['out.log'], + ); +}); + +test('extractRedirections: &>/dev/null is a write to /dev/null, not a fd dup', () => { + assert.deepEqual(extractRedirections(tokenize('cmd &>/dev/null')), ['/dev/null']); +}); + // --- destructive targets -------------------------------------------------- test('extractDestructiveTargets: rm -rf', () => { From 5b95fcf39f19fcc75f8f75fc5a9b6f0c849f2339 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 14:23:55 +0200 Subject: [PATCH 15/21] update readme --- agent-scope/README.md | 532 ++++-------------------------------------- 1 file changed, 42 insertions(+), 490 deletions(-) diff --git a/agent-scope/README.md b/agent-scope/README.md index 35246158b..6fa9ea9a5 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -1,522 +1,74 @@ # agent-scope -Task-scoped write permissions for AI coding agents. +Keeps AI coding agents from editing files they shouldn't. -An agent can **read** the whole repo, but can only **write** files that are -listed in the active task's manifest. Attempts to write out-of-scope files -are blocked by a stack of agent hooks (per-agent — Cursor, Claude Code) and -must be explicitly approved by a human (by editing the manifest). Agents -without a hook system (Codex CLI, Gemini CLI, etc.) get the same rules -delivered as instruction files and self-enforce. +The agent can read the whole repo, but can only **write** the files your +current task covers. If it tries to touch something else, you get a short +question first — accept or tell it what to do instead. You're never +restricted. This only watches the agent. -The guard restricts **agent** actions only. Humans committing, pushing, or -editing through their own terminal are never restricted — there are no git -hooks and no CI enforcement. If you edit a protected file by hand, you can -commit and push normally. - -## Opt-in by default - -agent-scope stays **invisible** until you engage it. With no active task and -no bootstrap, Cursor sessions behave as if the system didn't exist: the -session-start hook emits nothing, and the write/shell hooks only fire on the -hardcoded protected paths (the guard's own files). You can do ad-hoc work -without any task ceremony. - -You engage the system in one of three ways: - -1. **`pnpm task start`** — agent-guided onboarding. The CLI asks you for - a task description (single-Enter submit; multi-line pastes welcome), - drops a one-shot marker that embeds the description, and copies the - trigger text to your clipboard. The next message you send in any chat - (new or existing) makes the agent read your description, explore the - repo, and propose a scope via a short plan-mode `AskQuestion`. On - approval the agent runs `pnpm task create` itself (the - `afterShellExecution` hook has a narrow allowlist for the canonical - invocation — see "Architecture / approved-task-create allowlist" - below). -2. **Explicit** — `pnpm task set ` activates a manifest you - already have. -3. **Direct** — `pnpm task create --description "..." --allowed "..." --activate` - builds + activates a manifest in one shot (useful for CI / scripts). - -Clearing the active task (`pnpm task clear`) returns Cursor to its invisible -default. - -## Architecture (defense in depth) - -``` -Agent → Cursor sessionStart hook → injects active-task context + consumes any pending onboarding marker -Agent → Cursor preToolUse hook → blocks out-of-scope Write/Edit/Delete -Agent → Cursor beforeShellExecution → blocks destructive shell cmds on denied paths -Agent → Cursor afterShellExecution → reverts out-of-scope shell writes, deletes untracked files in denied paths -Agent → Cursor postToolUse hook → injects pending onboarding trigger in already-open chats (additive, never denies) -System → hardcoded protected paths → always blocks agent writes to agent-scope itself -Ops → optional webhook sink → forwards denials to DKG/Slack/etc. -``` - -All four agent-facing layers use the same library -(`agent-scope/lib/scope.mjs`) and the same manifests -(`agent-scope/tasks/*.json`). The pre-shell and after-shell layers back each -other up, so destructive commands that slip past the pre-check get reverted -or deleted afterwards. - -### Approved-task-create allowlist - -The after-shell hooks include a narrow, audited allowlist so the agent -can finish the onboarding flow itself — i.e. on plan-mode `approve`, -the agent runs `pnpm task create ...` and the hook lets the -resulting `agent-scope/tasks/.json` plus `agent-scope/active` -persist. - -The allowlist is: - -- **Deterministic** — `agent-scope/lib/shell-parse.mjs` (`extractTaskCreateId`) - tokenises the command and only matches canonical shapes: - `pnpm task create `, `pnpm run task create `, or - `node agent-scope/bin/task.mjs create `. Impostors like - `echo ... > agent-scope/tasks/evil.json`, `cp`, opaque evaluators - (`node -e`, `python -c`), `npm`/`yarn`/`bun` wrappers, or ids with - path-escape chars (`..`, `/`, `.`, spaces) return `null` and fall back - to the default revert/delete behaviour. -- **Narrow** — even with a valid id, only two paths are waived: - `agent-scope/tasks/.json` (that specific id) and `agent-scope/active`. - Other files written inside `agent-scope/tasks/**` in the same turn - (including other task manifests) are still reverted/deleted. -- **Audited** — every approved write is logged to - `agent-scope/logs/denials.jsonl` as an `afterShell.approved-create` - event alongside the command and task id. -- **Validated** — the CLI itself rejects invalid ids and schema errors, - so a syntactically-invalid manifest never reaches disk for the hook to - allow. - -This keeps the onboarding UX one-step (agent runs the command after -you click Approve) without weakening protection: every non-matching -write to `agent-scope/tasks/**` and `agent-scope/active` is still -immediately reverted. - -## Concepts - -| Concept | File | Description | -|---|---|---| -| **Task manifest** | `agent-scope/tasks/.json` | Declares what files a task is allowed to modify | -| **JSON schema** | `agent-scope/schema/task.schema.json` | Validates manifest structure | -| **Active task** | `agent-scope/active` | One-line file with the active task id (gitignored, per-developer) | -| **Env override** | `AGENT_SCOPE_TASK` | If set, takes precedence over the file | -| **Branch convention** | `task//*` or `agent-scope//*` | Branch name auto-detects the task if the env/file is unset | -| **Git config fallback** | `git config agent-scope.task ` | Lowest-priority fallback | -| **Denial log** | `agent-scope/logs/denials.jsonl` | Append-only audit trail (gitignored, rotates at 5MB) | -| **Bootstrap token** | `agent-scope/.bootstrap-token` | If present, disables hardcoded path protection (for maintainers editing agent-scope itself) | -| **Bootstrap env** | `AGENT_SCOPE_BOOTSTRAP=1` | Same as above but per-process | -| **Webhook sink** | `AGENT_SCOPE_WEBHOOK=` | POSTs each denial to the URL (opt-in) | - -## Supported agents - -| Agent | Enforcement | Wired via | -|---|---|---| -| Cursor | **hard hooks** — physical block | `.cursor/hooks/`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` | -| Claude Code | **hard hooks** — physical block | `.claude/hooks/`, `.claude/settings.json`, `CLAUDE.md` | -| Codex CLI (OpenAI) | soft — agent self-enforces | `AGENTS.md` | -| Gemini CLI | soft — agent self-enforces | `GEMINI.md` | -| Continue / Cline / older Cursor | soft (varies) | `.cursorrules` | - -**Hard enforcement** means the hook process physically rejects out-of-scope -writes before they hit disk, regardless of what the agent decides to do. -**Soft enforcement** means the agent reads the rule files at session start -and is expected to comply — this is the best we can do for agents that -don't expose a hook API yet. - -The same task manifests, the same CLI (`pnpm task ...`), the same denial -menu structure apply across all agents — only the enforcement layer -differs. - -## One-time setup - -There is no setup. Each agent loads its own config files (`.cursor/...`, -`.claude/...`, `AGENTS.md`, etc.) automatically when you open the repo. - -After pulling the repo, run this once to verify your agent is wired up: - -```bash -pnpm scope:check-agent # or: pnpm task check-agent -``` - -It prints a per-agent green/yellow/red status and tells you exactly what -(if anything) you need to do. Sample output: - -``` -Cursor [✓ active] - enforcement: hard hooks - ✓ .cursor/hooks.json present - ✓ .cursor/hooks/scope-guard.mjs executable - ... - -Claude Code [✓ active] - enforcement: hard hooks - ✓ .claude/settings.json present - ✓ .claude/hooks/scope-guard.mjs executable - ... - setup: - First-run note: Claude Code will prompt you to TRUST the project hooks - the first time you open this repo. Approve them — that's how - enforcement attaches. - -Codex CLI [~ soft] - enforcement: soft (no hook system available) - ✓ AGENTS.md present (Codex CLI reads this on every session) - ! Hard blocks DO NOT apply here — Codex self-enforces. -``` - -Other sanity checks: +## Start a task ```bash -pnpm scope:test # runs the scope library unit tests -pnpm scope:validate # validates every manifest +pnpm task start ``` -### Per-agent setup notes +Type what you're working on, hit Enter. Then send any message in the chat +(`start working`, `hi`, whatever). The agent reads your description, looks +around the repo, and proposes which folders to include. Accept it and the +agent starts working inside that scope. -- **Cursor**: hooks load automatically from `.cursor/hooks.json` next time - you open the repo. No prompt, no action needed. -- **Claude Code**: the first time you open this repo, Claude Code will - prompt you to **trust the project hooks**. You must approve — that's how - the enforcement attaches. After that it's automatic. -- **Codex CLI**: reads `AGENTS.md` automatically. No installation step. - Caveat — Codex CLI has no hook API today, so blocking out-of-scope - writes depends on the agent obeying the rules. -- **Gemini CLI**: reads `GEMINI.md` automatically. Same self-enforcement - caveat as Codex. -- **Other agents** (Continue, Cline, Roo, older Cursor): pick up - `.cursorrules`. Coverage varies — treat as best-effort. - -## Quick start +When you're done: ```bash -# Onboarding — paste a description in the CLI, the agent proposes a scope in chat -pnpm task start - -# Non-interactive manifest creation (flags) -pnpm task create my-task \ - --description "Refactor peer sync for workspace auth" \ - --allowed "packages/agent/src/**sync*" \ - --allowed "packages/agent/test/**sync*" \ - --inherits base \ - --activate - -# List available tasks (marks the active one with *) -pnpm task list - -# Set the active task -pnpm task set sync-refactor - -# See which task is active and what it allows -pnpm task show - -# Debug how the active task was resolved (env/file/branch/git-config) -pnpm task resolve - -# Create a new task manifest interactively (prompts you) -pnpm task init my-task - -# Check whether a specific path is in scope -pnpm task check packages/agent/src/sync-handler.ts -# → allow - -# Recent denials (audit) -pnpm task audit - -# Clear the active task (writes unrestricted again) pnpm task clear ``` -## Onboarding flow - -`pnpm task start` is the single onboarding command. It's agent-guided: -the CLI captures your task description, drops a one-shot marker, and the -agent takes it from there. +## When the agent wants to go out of scope -1. **You run `pnpm task start`** in the terminal. The CLI prompts: +You'll see something like this in the chat: - > What are you working on? +> I'd like to edit `packages/foo/bar.ts`, but the active task doesn't cover +> it. Add that folder and keep going? +> +> A) Yes, add it and continue +> B) Type what you want instead - Single Enter submits; multi-line pastes are captured in full via - paste-detection. +Pick A, or just type what you'd rather have. Nothing out of scope gets +written without your OK. -2. **The CLI drops `agent-scope/.pending-onboarding`** (gitignored) — - a one-shot marker containing the onboarding protocol *and* your - description inside a `=== USER TASK DESCRIPTION ===` block. The - trigger text is also copied to your clipboard as a fallback. - -3. **Your next message in any chat triggers onboarding.** Three - parallel consumers compete for the marker so it fires exactly once: - - - **New chat (Cmd+L)** — the `sessionStart` hook injects the trigger. - - **Current chat, any message** — the agent's top-of-turn rule reads - the marker on its first action; the `postToolUse` hook injects it - as `additional_context` if the agent happens to call a tool first. - - **Manual paste** — the trigger is already in your clipboard. - -4. **The agent follows a fixed protocol** (defined in - `.cursor/rules/agent-scope.mdc`, `CLAUDE.md`, `AGENTS.md`, `GEMINI.md`): - - 1. Reads your description from the marker (does NOT ask you again). - 2. Explores the codebase (Grep / Glob / SemanticSearch / DKG) to find - relevant files. Counts matching files per candidate package. - 3. Proposes a scope via a **single short `AskQuestion`** — one - question, two options. The prompt is a one-line rephrase of the - task + the scope as a numbered list (`1)`, `2)`, `3)` …) + "Sound - good?" The options are: - - `go` — "Yes, go with that" - - `custom_instruction` — "Type what you want instead" - 4. On `go`, the agent itself runs `pnpm task create ...` via the - shell tool. The `afterShellExecution` / PostToolUse-Bash hooks - recognise the canonical task-create invocation and allow its two - specific writes (`agent-scope/tasks/.json` and - `agent-scope/active`) to persist; every other write to those paths - is still reverted. See the "approved-task-create allowlist" - section for details. - 5. On `custom_instruction`, the agent asks in plain chat what you'd - like changed, updates the draft, and re-asks step 3. - 6. Once approved, the agent starts the real work in the same turn. - -From here, every attempted write to an out-of-scope file triggers a -plan-mode AskQuestion menu — see **Escalation** below. - -The marker is one-shot: the first consumer that reads it also deletes -it, so onboarding fires exactly once per `pnpm task start`. - -If `stdin` is not a TTY (CI, piped input), `pnpm task start` errors out -with guidance to use `pnpm task create --flags...` directly. - -## Manifest format - -```json -{ - "id": "sync-refactor", - "description": "Refactor peer sync protocol to add workspace sync auth", - "owner": "bojan", - "allowed": [ - "packages/agent/src/**sync*", - "packages/agent/src/discovery.ts", - "packages/core/src/**sync*", - "packages/publisher/src/**sync*", - "packages/*/test/**sync*", - "!**/secrets.*" - ], - "exemptions": [ - "**/dist/**", - "**/*.tsbuildinfo", - "pnpm-lock.yaml" - ] -} -``` - -- `allowed` — glob patterns that the agent may write to. Supports `*`, `**`, `?`. -- `exemptions` — patterns that are always allowed (build artifacts, lockfiles). -- `!pattern` — explicit deny, overrides everything else in both lists. -- **Default-deny**: anything not matched is blocked. - -Run `pnpm scope:validate` to verify all manifests conform to -`agent-scope/schema/task.schema.json`. - -## How enforcement works - -Four agent-facing layers, all running inside Cursor: - -1. **`sessionStart` hook** (`.cursor/hooks/session-start.mjs`) injects the - active task's allowed patterns into the agent's context so it knows what - it may modify from the first turn. **When no task is active and bootstrap - is off, the hook emits nothing** — the agent's initial context is - untouched. Only when a task is active (or bootstrap is on) does it surface - a context block. -2. **`preToolUse` hook** (`.cursor/hooks/scope-guard.mjs`) runs before every - `Write`, `StrReplace`, `Delete`, `EditNotebook`, `MultiEdit`, and `Edit`. - It runs the protected-path check first, then the task-scope check. -3. **`beforeShellExecution` hook** (`.cursor/hooks/shell-precheck.mjs`) - tokenises the pending shell command and blocks destructive verbs - (`rm`, `mv`, `cp`, `chmod`, `chown`, `truncate`, `ln -sf`, `sed -i`, - redirections `>` / `>>` / `tee`, `find -delete`, `xargs rm`) when their - target is out-of-scope or hardcode-protected. Recurses into `bash -c`, - `sh -c`, and opaque evaluators (`node -e`, `python -c`, `perl -e`) to - catch bypass attempts that hide destructive operations inside string - arguments. Parsing logic lives in `agent-scope/lib/shell-parse.mjs` and - is fully unit-tested. -4. **`afterShellExecution` hook** (`.cursor/hooks/shell-diff-check.mjs`) is - the backstop for anything the pre-check misses: it runs - `git status --porcelain`, `git checkout --` reverts any tracked - out-of-scope/protected modifications, and **deletes** untracked files in - denied paths (so an agent cannot establish persistent state like a new - hook file via a pre-shell bypass). -5. **`postToolUse` hook** (`.cursor/hooks/post-tool-use.mjs`) exists only to - consume a pending onboarding marker (written by `pnpm task start`) in an - already-open chat. It never denies anything — it just injects the - onboarding trigger as `additional_context` after the next tool call, so - the agent pivots to the Task onboarding protocol on its next turn. - -If no active task is set (no env, no file, no matching branch, no git-config) -**and** bootstrap is off, layer 1 is silent and layers 2–4 only trigger on -the hardcoded protected paths. Everything else is a no-op — you can do -ad-hoc work without changing the workflow. Layer 5 only emits anything when -`agent-scope/.pending-onboarding` is present. - -No layer restricts **humans**. You can `git commit`, `git push`, and edit -anything manually through your terminal or IDE without interacting with the -guard — it only sees what the agent does. - -## Hardcoded protected paths - -Some files define the enforcement system itself. If the agent were free to -edit them, the whole thing would be worthless. These paths are **always -denied** regardless of active task, unless bootstrap mode is active: - -- `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` -- `.claude/hooks/**`, `.claude/settings.json` -- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/schema/**` -- `agent-scope/tasks/**`, `agent-scope/active`, - `agent-scope/.bootstrap-token` -- `AGENTS.md`, `GEMINI.md`, `.cursorrules` - -(This list applies to **agent** writes only. A human editing any of these -files through their own terminal/IDE is not restricted.) +## Supported agents -### Bootstrap mode +- **Cursor** and **Claude Code** — hard-blocked at the hook level, the + agent physically can't write out-of-scope files. +- **Codex CLI** and **Gemini CLI** — no hook API yet, so they read + `AGENTS.md` / `GEMINI.md` on session start and are expected to follow the + rules. Best-effort. -To legitimately improve `agent-scope` itself, a human enables bootstrap mode. -Two equivalent switches: +After you clone the repo, run this once to check your agent is wired up: ```bash -# Option A — file token (persists across sessions until deleted) -touch agent-scope/.bootstrap-token - -# Option B — env var (just for the current Cursor process) -export AGENT_SCOPE_BOOTSTRAP=1 +pnpm scope:check-agent ``` -While bootstrap is active, the sessionStart hook prints a loud warning into -the agent context. When you're done, remove it: +## Commands ```bash -rm agent-scope/.bootstrap-token -``` - -The bootstrap token is in `.gitignore`, so it cannot accidentally leak into -a commit even if you `git add .`. If you ever do `git add -f` it, remove it -before pushing. - -## Manifest inheritance - -Manifests can share common exemptions (e.g. `**/dist/**`, `pnpm-lock.yaml`) -via an `inherits` field. The `base` task ships as a pure-exemption parent: - -```json -{ "id": "child", "inherits": ["base"], "allowed": ["src/**"] } -``` - -Inheritance merges parents first (deduplicating), then the child's own -`allowed`/`exemptions` are appended. `!pattern` denials in a child override -parent `allowed` patterns. Cycles are detected and rejected. - -## Optional webhook sink - -Forward denials to a DKG node / Slack / log aggregator by setting -`AGENT_SCOPE_WEBHOOK` to an http(s) URL. Each denial is POSTed as JSON -(fire-and-forget, 1.5s timeout). Activity is also written to -`agent-scope/logs/denials.jsonl` locally with automatic rotation at 5MB. - -## Escalation — plan-mode denial menu - -Every denial (preToolUse, beforeShellExecution, afterShellExecution) emits both -a short human summary **and** a machine-readable JSON menu embedded in the -hook's response. Agents following `.cursor/rules/agent-scope.mdc`, -`CLAUDE.md`, or `AGENTS.md` must parse the menu and surface it via their -client's plan-mode-style question mechanism (`AskQuestion` in Cursor) — -**one question, two options**. - -The structured block is fenced by HTML comments so it's trivial to locate: - -``` - -{ - "version": 1, - "hook": "preToolUse", - "reason": "out-of-scope", - "humanSummary": "I'd like to edit `packages/evm-module/contracts/Staking.sol`, but the active task `sync-refactor` doesn't cover that file.", - "deniedPath": "packages/evm-module/contracts/Staking.sol", - "activeTask": "sync-refactor", - "suggestedGlob": "packages/evm-module/contracts/**", - "alternativeTasks": [ { "id": "staking", "description": "..." } ], - "simpleOptions": [ - { "id": "add_glob", "label": "Add this folder to the task and try again", "action": { "kind": "add_to_manifest", ... } }, - { "id": "custom_instruction", "label": "Type what you want instead", "action": { "kind": "custom" } } - ], - "options": [ - /* full verbose list — audit/back-compat only, NOT surfaced to users */ - ], - "recommendedOptionId": "add_glob", - "agentReasoning": null -} - -``` - -Possible `action.kind` values: - -| kind | what the agent should do | -|---|---| -| `add_to_manifest` | Add `action.patterns` to `agent-scope/tasks/.json` under `allowed`, then retry the original operation. | -| `switch_task` | Run `pnpm task set `, then retry. | -| `bootstrap` | Print `action.instruction` to the user and wait for confirmation. Remind them to remove the token after. | -| `fix_manifest` | Open `agent-scope/tasks/.json`, fix the error (`action.error`), re-run `pnpm task validate`. | -| `clear_task` | Run `pnpm task clear`. | -| `skip` | Acknowledge and move on. | -| `cancel` | Stop the turn; summarise for the user. | -| `custom` | Ask the user in plain chat what they want instead, then do it. | - -Extra guidance in the block: - -- `humanSummary` is the one-line natural-language framing of the situation. - The agent is told to **quote this verbatim** in the AskQuestion prompt and - add one short sentence of their own reasoning (why they wanted to do it). - Keep the whole prompt to 3 sentences max. -- `simpleOptions` always has **exactly two entries**: the LLM-recommended - action (short human label like "Add this folder to the task and try - again", "Skip it", "Yes, unlock it so I can do this edit") and a - free-text fallback `custom_instruction` → `"Type what you want - instead"`. Agents surface these two options and **never** surface the - verbose `options` list. -- `options` is the verbose, audit-grade list (add_file, add_glob, switch - tasks, skip, cancel, bootstrap, fix_manifest, clear_task, custom). It is - preserved for back-compat, tests, and anyone inspecting the JSON - directly — but not intended for end-user display. -- `recommendedOptionId` is the id of `simpleOptions[0]`. Chosen - conservatively: `add_glob` for out-of-scope, `cancel` for protected, - `fix_manifest` for manifest-load errors. -- `agentReasoning: null` is a placeholder the agent overwrites when - quoting it in their prompt. - -Heuristics (in `agent-scope/lib/denial.mjs`): - -- `suggestedGlob` is derived from the denied path's parent directory - (`dirname/**`). -- `alternativeTasks` lists up to 3 other manifests that already cover the - denied path. -- Protected denials recommend `cancel` by default — the user must - explicitly opt into `bootstrap` via the "something else" free-text - fallback if they want to unlock the system. - -Builders and tests live alongside the scope library: - -``` -agent-scope/lib/denial.mjs # the builders -agent-scope/lib/denial.test.mjs # 40 unit tests +pnpm task start # AI-guided onboarding (normal flow) +pnpm task show # what's active and what it covers +pnpm task list # all tasks, * marks active +pnpm task set # switch to an existing task +pnpm task check # will this file be allowed? +pnpm task audit # recent denials +pnpm task clear # turn protection off ``` -No special tokens or APIs — the manifest is the source of truth; edit it to -grant permission. Changes to a manifest still go through normal review. +## Editing agent-scope itself -## Debug / audit +The files that run the guard are permanently off-limits to the agent — +otherwise it could disable itself. To edit them, drop a token: ```bash -pnpm task resolve # how was the active task resolved? -pnpm task audit # recent denials -pnpm task validate # check all manifests -tail -f agent-scope/logs/denials.jsonl +touch agent-scope/.bootstrap-token # unlock +rm agent-scope/.bootstrap-token # lock again ``` From 82fa43ce75439ba82511e4945005ab76ee5118c4 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 14:36:06 +0200 Subject: [PATCH 16/21] clean the code --- agent-scope/tasks/staking.json | 20 -------------------- agent-scope/tasks/sync-refactor.json | 12 ------------ 2 files changed, 32 deletions(-) delete mode 100644 agent-scope/tasks/staking.json delete mode 100644 agent-scope/tasks/sync-refactor.json diff --git a/agent-scope/tasks/staking.json b/agent-scope/tasks/staking.json deleted file mode 100644 index 8d22bb07a..000000000 --- a/agent-scope/tasks/staking.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "id": "staking", - "description": "Example: work on the staking contracts and chain adapter bindings", - "owner": "unassigned", - "inherits": ["base"], - "allowed": [ - "packages/evm-module/contracts/Staking.sol", - "packages/evm-module/contracts/StakingKPI.sol", - "packages/evm-module/contracts/storage/StakingStorage.sol", - "packages/evm-module/contracts/storage/DelegatorsInfo.sol", - "packages/evm-module/contracts/storage/ConvictionStakingStorage.sol", - "packages/evm-module/test/**staking*", - "packages/evm-module/deploy/**staking*", - "packages/chain/src/**staking*" - ], - "exemptions": [ - "**/artifacts/**", - "**/cache/**" - ] -} diff --git a/agent-scope/tasks/sync-refactor.json b/agent-scope/tasks/sync-refactor.json deleted file mode 100644 index a3840fcc6..000000000 --- a/agent-scope/tasks/sync-refactor.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "id": "sync-refactor", - "description": "Example: refactor the peer sync protocol (/dkg/sync/1.0.0)", - "owner": "unassigned", - "inherits": ["base"], - "allowed": [ - "packages/agent/src/**sync*", - "packages/core/src/**sync*", - "packages/publisher/src/**sync*", - "packages/*/test/**sync*" - ] -} From db059b3a4af111a6e7aba724519569756dda3186 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 14:51:17 +0200 Subject: [PATCH 17/21] update --- .cursor/hooks.json | 8 +++ .cursor/hooks/stop.mjs | 76 ++++++++++++++++++++++++++++ agent-scope/lib/check-agent.mjs | 1 + agent-scope/lib/check-agent.test.mjs | 4 +- agent-scope/lib/onboarding.mjs | 17 +++++-- 5 files changed, 99 insertions(+), 7 deletions(-) create mode 100755 .cursor/hooks/stop.mjs diff --git a/.cursor/hooks.json b/.cursor/hooks.json index ebff0e2b0..912bdf1a9 100644 --- a/.cursor/hooks.json +++ b/.cursor/hooks.json @@ -36,6 +36,14 @@ "failClosed": false, "timeout": 10 } + ], + "stop": [ + { + "command": ".cursor/hooks/stop.mjs", + "failClosed": false, + "timeout": 5, + "loop_limit": 1 + } ] } } diff --git a/.cursor/hooks/stop.mjs b/.cursor/hooks/stop.mjs new file mode 100755 index 000000000..c98885a49 --- /dev/null +++ b/.cursor/hooks/stop.mjs @@ -0,0 +1,76 @@ +#!/usr/bin/env node +// Cursor `stop` hook. Fires when the agent finishes an assistant turn. +// +// Purpose: the onboarding last-ditch trigger for existing chats. Cursor's +// hook API has no equivalent of Claude Code's `UserPromptSubmit`, so when +// the user sends a purely conversational message ("hi", "start working") +// in an existing chat AND the agent replies without calling a tool, +// neither `sessionStart` nor `postToolUse` fires — the pending-onboarding +// marker sits there untouched and the agent never learns about it. +// +// This hook closes that gap. When the agent's reply finishes and a +// marker is still pending (and no task is active yet), we auto-submit +// the onboarding trigger as the next user message via `followup_message`. +// Cursor then feeds that as the next user turn, so the agent pivots to +// the Task onboarding protocol on its very next reply. +// +// Cost: one generic agent reply before onboarding kicks in. +// Benefit: no silent-failure case anymore, regardless of whether the +// user starts a new chat or reuses an existing one. +// +// One-shot: consumeOnboardingMarker reads + deletes atomically, so the +// followup fires exactly once per `pnpm task start`. The `loop_limit: 1` +// setting in hooks.json is a belt-and-suspenders cap. + +import { readFileSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath, pathToFileURL } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const __dirname = dirname(__filename); + +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; + +const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); +const { consumeOnboardingMarker } = await import(onboardUrl); + +try { checkNodeVersion(); } catch (e) { + process.stderr.write(e.message + '\n'); + process.stdout.write('{}'); + process.exit(0); +} + +function emit(obj) { + process.stdout.write(JSON.stringify(obj || {})); + process.exit(0); +} + +function readStdin() { + try { return readFileSync(0, 'utf8'); } catch { return ''; } +} + +async function main() { + // The stop hook receives {status, loop_count, ...}. We don't care about + // it — we only act when a pending-onboarding marker is still there. + readStdin(); + + const root = resolveRepoRoot(); + const { id: taskId } = resolveActiveTaskId(root); + + // Active task → onboarding already happened (or irrelevant). Nothing to do. + if (taskId) return emit({}); + + const payload = consumeOnboardingMarker(root); + if (!payload) return emit({}); + + // Cursor will auto-submit `followup_message` as the next user message. + // The payload already contains the full onboarding protocol + the user's + // task description, so the agent's next turn has everything it needs. + return emit({ followup_message: payload }); +} + +main().catch(err => { + process.stderr.write(`stop hook error: ${err?.message || err}\n`); + emit({}); +}); diff --git a/agent-scope/lib/check-agent.mjs b/agent-scope/lib/check-agent.mjs index 2e6900854..8480f00d1 100644 --- a/agent-scope/lib/check-agent.mjs +++ b/agent-scope/lib/check-agent.mjs @@ -64,6 +64,7 @@ function cursorAgent(root) { 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', + 'stop.mjs', ]; for (const f of requiredHooks) { const p = resolve(hooksDir, f); diff --git a/agent-scope/lib/check-agent.test.mjs b/agent-scope/lib/check-agent.test.mjs index c43ad4159..7c1ffe0d4 100644 --- a/agent-scope/lib/check-agent.test.mjs +++ b/agent-scope/lib/check-agent.test.mjs @@ -40,7 +40,7 @@ test('detectAgents: full Cursor wiring → ok', () => { mkdirSync(join(root, '.cursor/rules'), { recursive: true }); writeFileSync(join(root, '.cursor/hooks.json'), '{}'); writeFileSync(join(root, '.cursor/rules/agent-scope.mdc'), ''); - for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs']) { + for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', 'stop.mjs']) { touchHook(root, '.cursor', f); } const cursor = detectAgents(root).find(a => a.name === 'Cursor'); @@ -54,7 +54,7 @@ test('detectAgents: Cursor hook not executable → warn', () => { mkdirSync(join(root, '.cursor/rules'), { recursive: true }); writeFileSync(join(root, '.cursor/hooks.json'), '{}'); writeFileSync(join(root, '.cursor/rules/agent-scope.mdc'), ''); - for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs']) { + for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', 'stop.mjs']) { touchHook(root, '.cursor', f); } chmodSync(join(root, '.cursor/hooks/scope-guard.mjs'), 0o644); diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs index 996282c58..ba915bb56 100644 --- a/agent-scope/lib/onboarding.mjs +++ b/agent-scope/lib/onboarding.mjs @@ -14,13 +14,20 @@ // description, so the agent does not need to ask "describe the // task" again. // 2. The user sends any message in any chat. -// 3. THREE parallel consumers pick up the marker — whichever runs first +// 3. Multiple parallel consumers pick up the marker — whichever runs first // wins, because consume is atomic (read-and-delete): // -// (a) `sessionStart` hook — fires on a brand new chat. -// (b) `postToolUse` hook — fires after any tool call in an existing -// chat (Cursor + Claude Code). -// (c) The AGENT ITSELF — the always-applied rule requires a +// (a) `sessionStart` hook — fires on a brand new chat. +// (b) `UserPromptSubmit` hook — Claude Code only, fires BEFORE each +// user prompt reaches the agent (most reliable path). +// (c) `postToolUse` hook — fires after any tool call the agent +// makes (Cursor + Claude Code). +// (d) `stop` hook — Cursor only, fires when the agent +// finishes a turn. If the marker is still pending, returns it as +// `followup_message` so Cursor auto-submits it as the next user +// message. This is the safety net for existing Cursor chats where +// the agent replied with plain text and never triggered (c). +// (e) The AGENT ITSELF — the always-applied rule requires a // top-of-turn marker check so even pure conversational messages // (e.g. "hi") consume the marker correctly. // From 45ad6aae54be2b98b873403f4230c7d4352303d1 Mon Sep 17 00:00:00 2001 From: Bojan Date: Wed, 22 Apr 2026 17:17:41 +0200 Subject: [PATCH 18/21] update --- .claude/hooks/post-tool-use.mjs | 21 ++++++----- .cursor/hooks/post-tool-use.mjs | 38 ++++++++++++++++---- agent-scope/bin/task.mjs | 10 ++++++ agent-scope/lib/onboarding.mjs | 56 +++++++++++++++++++++-------- agent-scope/lib/onboarding.test.mjs | 44 +++++++++++++++++++++++ 5 files changed, 139 insertions(+), 30 deletions(-) diff --git a/.claude/hooks/post-tool-use.mjs b/.claude/hooks/post-tool-use.mjs index ffbb10d87..53dfdffd1 100755 --- a/.claude/hooks/post-tool-use.mjs +++ b/.claude/hooks/post-tool-use.mjs @@ -1,12 +1,17 @@ #!/usr/bin/env node // Claude Code PostToolUse hook (any tool except Bash, which has its own -// shell-diff-check). Sole purpose: detect a pending onboarding marker -// (written by `pnpm task start`) and inject the trigger as additional -// context. One-shot via consumeOnboardingMarker. +// shell-diff-check). Purpose: if a pending onboarding marker exists +// (written by `pnpm task start`), inject the trigger as additional +// context. READ-ONLY — does NOT delete the marker. // -// In Claude Code we ALSO have UserPromptSubmit (see user-prompt-submit.mjs) -// which catches the marker before any tool runs — this hook is the -// belt-and-suspenders for cases where the agent acts on a tool first. +// In Claude Code, `UserPromptSubmit` (see user-prompt-submit.mjs) is the +// authoritative consumer — it fires BEFORE the agent sees a prompt, so +// there is no race. This hook is kept as a best-effort mid-turn +// injection for Claude Code flows where a tool call happens before +// UserPromptSubmit delivered anything (edge case). +// +// Peek semantics mirror the Cursor hook. See its header for the full +// rationale and the list of authoritative deleters. import { readFileSync } from 'node:fs'; import { resolve, dirname } from 'node:path'; @@ -19,7 +24,7 @@ const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); -const { consumeOnboardingMarker } = await import(onboardUrl); +const { readOnboardingMarker } = await import(onboardUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -44,7 +49,7 @@ async function main() { if (taskId) return emit({}); - const payload = consumeOnboardingMarker(root); + const payload = readOnboardingMarker(root); if (!payload) return emit({}); return emit({ diff --git a/.cursor/hooks/post-tool-use.mjs b/.cursor/hooks/post-tool-use.mjs index c17baa737..bd39aaa4c 100755 --- a/.cursor/hooks/post-tool-use.mjs +++ b/.cursor/hooks/post-tool-use.mjs @@ -1,12 +1,36 @@ #!/usr/bin/env node // Cursor postToolUse hook. Fires after every tool the agent calls. // -// Sole purpose: detect a pending `agent-scope/.pending-onboarding` marker -// (written by `pnpm task start`) and inject the onboarding trigger as -// `additional_context`. One-shot: `consumeOnboardingMarker` reads-and-deletes -// atomically, so the trigger fires exactly once even across many tool calls. +// Purpose: if a pending `agent-scope/.pending-onboarding` marker exists +// (written by `pnpm task start`), inject its content as +// `additional_context`. READ-ONLY — does NOT delete the marker. // -// No deny, no permission gating. Purely additive. +// Why peek and not consume? +// Earlier versions did read-and-delete here. That created a nasty race in +// existing Cursor chats: the first tool call the agent made (for any +// reason) deleted the marker, but Cursor's `additional_context` from +// postToolUse did not reliably land in the current turn's visible +// context. Net result: marker gone, agent never saw the payload, agent +// reports "something was here but I can't see it". +// +// New lifecycle — the marker persists until one of these authoritative +// consumers runs: +// - `sessionStart` hook (new chat: delete + inject once) +// - `stop` hook (end-of-turn in existing Cursor chat: +// delete + re-submit as next user +// message via followup_message, which +// IS Cursor-guaranteed) +// - `pnpm task create --activate` (success = "I processed it") +// - `pnpm task clear` (user abandons) +// +// This hook stays as the fast-path best-effort injection: if Cursor DOES +// stitch additional_context into the current turn, the agent reacts +// immediately. If it doesn't, `stop` is the safety net. Either way the +// marker survives until an authoritative deleter runs. +// +// Re-injection noise (same payload on every tool call) is harmless — +// additional_context is internal, never shown to the user, and the +// onboarding protocol is idempotent. import { readFileSync } from 'node:fs'; import { resolve, dirname } from 'node:path'; @@ -19,7 +43,7 @@ const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scop const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); -const { consumeOnboardingMarker } = await import(onboardUrl); +const { readOnboardingMarker } = await import(onboardUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -44,7 +68,7 @@ async function main() { if (taskId) return emit({}); - const payload = consumeOnboardingMarker(root); + const payload = readOnboardingMarker(root); if (!payload) return emit({}); return emit({ additional_context: payload }); diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs index bd3859a8c..766bcd5d9 100755 --- a/agent-scope/bin/task.mjs +++ b/agent-scope/bin/task.mjs @@ -15,6 +15,7 @@ import { import { buildOnboardingTrigger, writeOnboardingMarker, + deleteOnboardingMarker, copyToClipboard, } from '../lib/onboarding.mjs'; import { detectAgents, statusGlyph, summary } from '../lib/check-agent.mjs'; @@ -97,6 +98,10 @@ function set(id) { function clear() { if (existsSync(activeFile)) unlinkSync(activeFile); + // Also clear any pending onboarding marker — if the user ran `pnpm + // task start` and then abandons the flow, the marker should not + // linger and keep triggering onboarding on future messages. + deleteOnboardingMarker(root); console.log('Active task cleared. Writes are unrestricted (except for protected paths).'); } @@ -341,6 +346,11 @@ function create(argv) { if (opts.activate) { loadTask(root, opts.id); writeFileSync(activeFile, `${opts.id}\n`, 'utf8'); + // A successful activate = "the agent processed the onboarding + // description and is now working." The pending-onboarding marker + // has served its purpose and must be cleared so future turns don't + // keep re-injecting the onboarding protocol into context. + deleteOnboardingMarker(root); console.log(`Active task set: ${opts.id}`); } else { console.log(`Activate with: pnpm task set ${opts.id}`); diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs index ba915bb56..635b4f78e 100644 --- a/agent-scope/lib/onboarding.mjs +++ b/agent-scope/lib/onboarding.mjs @@ -14,24 +14,36 @@ // description, so the agent does not need to ask "describe the // task" again. // 2. The user sends any message in any chat. -// 3. Multiple parallel consumers pick up the marker — whichever runs first -// wins, because consume is atomic (read-and-delete): +// 3. The marker is delivered to the agent via two kinds of consumers — +// AUTHORITATIVE (read-and-delete) and BEST-EFFORT PEEK (read-only). +// Splitting them this way avoids a race where a mid-turn peek would +// otherwise delete the marker before the agent's visible context +// picked it up. // +// Authoritative (delete + inject): // (a) `sessionStart` hook — fires on a brand new chat. // (b) `UserPromptSubmit` hook — Claude Code only, fires BEFORE each -// user prompt reaches the agent (most reliable path). -// (c) `postToolUse` hook — fires after any tool call the agent -// makes (Cursor + Claude Code). -// (d) `stop` hook — Cursor only, fires when the agent -// finishes a turn. If the marker is still pending, returns it as -// `followup_message` so Cursor auto-submits it as the next user -// message. This is the safety net for existing Cursor chats where -// the agent replied with plain text and never triggered (c). -// (e) The AGENT ITSELF — the always-applied rule requires a -// top-of-turn marker check so even pure conversational messages -// (e.g. "hi") consume the marker correctly. +// user prompt reaches the agent. No race. +// (c) `stop` hook — Cursor only, fires at end of a +// turn. Returns the payload as `followup_message`, which +// Cursor auto-submits as the next user message. This is the +// safety net for existing Cursor chats where the agent +// replied conversationally with no tool call. // -// 4. The agent follows the "Task onboarding protocol" (CLAUDE.md, +// Best-effort peek (read-only, NO delete): +// (d) `postToolUse` hook — Cursor + Claude Code. Fires after +// any tool call. Fast-path injection via `additional_context`. +// Does NOT delete so mid-turn injection noise is harmless and +// the authoritative consumers remain in control of the +// lifecycle. +// +// 4. The marker is explicitly deleted by the lifecycle owners once the +// flow is resolved: +// - `pnpm task create --activate` (success = "I processed this") +// - `pnpm task clear` (user abandons the flow) +// This is the final cleanup step. +// +// 5. The agent follows the "Task onboarding protocol" (CLAUDE.md, // .cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md). // // Zero runtime deps. Pure-ish (spawnSync for clipboard; filesystem for marker). @@ -160,7 +172,9 @@ export function readOnboardingMarker(root) { } catch { return null; } } -// Read-and-delete. Used by hooks so the trigger fires exactly once. +// Read-and-delete. Used by AUTHORITATIVE consumers only (sessionStart, +// stop, UserPromptSubmit). The postToolUse peek-hooks do NOT use this — +// see `.cursor/hooks/post-tool-use.mjs` for the race rationale. export function consumeOnboardingMarker(root) { const p = onboardingMarkerPath(root); try { @@ -171,6 +185,18 @@ export function consumeOnboardingMarker(root) { } catch { return null; } } +// Idempotent delete. Used by `pnpm task create --activate` and +// `pnpm task clear` to clean up a pending marker once the flow is +// resolved (task activated or abandoned). No-op if the marker is absent. +export function deleteOnboardingMarker(root) { + const p = onboardingMarkerPath(root); + try { + if (!existsSync(p)) return false; + try { unlinkSync(p); } catch { try { rmSync(p, { force: true }); } catch {} } + return true; + } catch { return false; } +} + // --------------------------------------------------------------------------- // Cross-platform clipboard copy (best-effort) // --------------------------------------------------------------------------- diff --git a/agent-scope/lib/onboarding.test.mjs b/agent-scope/lib/onboarding.test.mjs index a0c9c6aab..cbd50f0aa 100644 --- a/agent-scope/lib/onboarding.test.mjs +++ b/agent-scope/lib/onboarding.test.mjs @@ -18,6 +18,7 @@ import { hasOnboardingMarker, readOnboardingMarker, consumeOnboardingMarker, + deleteOnboardingMarker, copyToClipboard, } from './onboarding.mjs'; @@ -166,6 +167,49 @@ test('marker: consumeOnboardingMarker on missing file returns null without throw } finally { cleanup(root); } }); +test('marker: readOnboardingMarker is read-only — does NOT delete (peek semantics)', () => { + // This is the critical invariant for postToolUse peek hooks. If this + // regresses, existing-chat onboarding in Cursor breaks again because + // the marker gets deleted mid-turn before the agent sees it. + const root = mkRoot(); + try { + writeOnboardingMarker(root, 'peek me'); + assert.equal(readOnboardingMarker(root), 'peek me'); + assert.ok(existsSync(onboardingMarkerPath(root)), 'marker must survive a read'); + // Repeated reads must keep returning the payload until someone + // authoritative deletes it. + assert.equal(readOnboardingMarker(root), 'peek me'); + assert.equal(readOnboardingMarker(root), 'peek me'); + assert.ok(existsSync(onboardingMarkerPath(root))); + } finally { cleanup(root); } +}); + +test('marker: deleteOnboardingMarker removes the file and returns true', () => { + const root = mkRoot(); + try { + writeOnboardingMarker(root, 'bye'); + assert.equal(deleteOnboardingMarker(root), true); + assert.equal(existsSync(onboardingMarkerPath(root)), false); + } finally { cleanup(root); } +}); + +test('marker: deleteOnboardingMarker on missing file is a no-op returning false', () => { + const root = mkRoot(); + try { + assert.equal(deleteOnboardingMarker(root), false); + } finally { cleanup(root); } +}); + +test('marker: delete is idempotent (safe to call twice)', () => { + const root = mkRoot(); + try { + writeOnboardingMarker(root, 'x'); + assert.equal(deleteOnboardingMarker(root), true); + assert.equal(deleteOnboardingMarker(root), false); + assert.equal(existsSync(onboardingMarkerPath(root)), false); + } finally { cleanup(root); } +}); + test('copyToClipboard returns a structured result (never throws)', () => { const result = copyToClipboard('test payload'); assert.ok(result && typeof result === 'object'); From 928494839f34fc339a14e4fcfa55609043caeeb1 Mon Sep 17 00:00:00 2001 From: Bojan Date: Mon, 27 Apr 2026 14:02:45 +0200 Subject: [PATCH 19/21] update system to use dkg --- .claude/hooks/post-tool-use.mjs | 66 -- .claude/hooks/session-start.mjs | 115 +-- .claude/hooks/shell-diff-check.mjs | 54 +- .claude/hooks/user-prompt-submit.mjs | 58 +- .claude/settings.json | 5 - .cursor/hooks.json | 17 +- .cursor/hooks/post-tool-use.mjs | 80 -- .cursor/hooks/session-start.mjs | 129 +-- .cursor/hooks/shell-diff-check.mjs | 53 +- .cursor/hooks/stop.mjs | 76 -- .cursor/rules/agent-scope.mdc | 299 ++----- .cursorrules | 42 +- .gitignore | 5 +- AGENTS.md | 416 +++------ CLAUDE.md | 386 +++------ GEMINI.md | 29 +- agent-scope/README.md | 100 ++- agent-scope/bin/task.mjs | 494 ----------- agent-scope/lib/check-agent.mjs | 71 +- agent-scope/lib/check-agent.test.mjs | 24 +- agent-scope/lib/denial.mjs | 309 ++++--- agent-scope/lib/denial.test.mjs | 799 ++++-------------- agent-scope/lib/dkg-source.mjs | 383 +++++++++ agent-scope/lib/dkg-source.test.mjs | 203 +++++ agent-scope/lib/onboarding.mjs | 235 ------ agent-scope/lib/onboarding.test.mjs | 228 ----- agent-scope/lib/prompter.mjs | 171 ---- agent-scope/lib/prompter.test.mjs | 221 ----- agent-scope/lib/scope.mjs | 362 ++++---- agent-scope/lib/scope.test.mjs | 445 +++------- agent-scope/lib/shell-parse.mjs | 103 --- agent-scope/lib/shell-parse.test.mjs | 146 +--- agent-scope/schema/task.schema.json | 48 -- agent-scope/tasks/base.json | 20 - package.json | 7 +- packages/mcp-dkg/hooks/capture-chat.mjs | 1 + packages/mcp-dkg/src/tools/annotations.ts | 9 +- packages/mcp-dkg/src/tools/writes.ts | 190 ++++- .../ontologies/coding-project/agent-guide.md | 65 +- .../ontologies/coding-project/ontology.ttl | 35 +- 40 files changed, 2144 insertions(+), 4355 deletions(-) delete mode 100755 .claude/hooks/post-tool-use.mjs delete mode 100755 .cursor/hooks/post-tool-use.mjs delete mode 100755 .cursor/hooks/stop.mjs delete mode 100755 agent-scope/bin/task.mjs create mode 100644 agent-scope/lib/dkg-source.mjs create mode 100644 agent-scope/lib/dkg-source.test.mjs delete mode 100644 agent-scope/lib/onboarding.mjs delete mode 100644 agent-scope/lib/onboarding.test.mjs delete mode 100644 agent-scope/lib/prompter.mjs delete mode 100644 agent-scope/lib/prompter.test.mjs delete mode 100644 agent-scope/schema/task.schema.json delete mode 100644 agent-scope/tasks/base.json diff --git a/.claude/hooks/post-tool-use.mjs b/.claude/hooks/post-tool-use.mjs deleted file mode 100755 index 53dfdffd1..000000000 --- a/.claude/hooks/post-tool-use.mjs +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/env node -// Claude Code PostToolUse hook (any tool except Bash, which has its own -// shell-diff-check). Purpose: if a pending onboarding marker exists -// (written by `pnpm task start`), inject the trigger as additional -// context. READ-ONLY — does NOT delete the marker. -// -// In Claude Code, `UserPromptSubmit` (see user-prompt-submit.mjs) is the -// authoritative consumer — it fires BEFORE the agent sees a prompt, so -// there is no race. This hook is kept as a best-effort mid-turn -// injection for Claude Code flows where a tool call happens before -// UserPromptSubmit delivered anything (edge case). -// -// Peek semantics mirror the Cursor hook. See its header for the full -// rationale and the list of authoritative deleters. - -import { readFileSync } from 'node:fs'; -import { resolve, dirname } from 'node:path'; -import { fileURLToPath, pathToFileURL } from 'node:url'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; -const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; - -const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); -const { readOnboardingMarker } = await import(onboardUrl); - -try { checkNodeVersion(); } catch (e) { - process.stderr.write(e.message + '\n'); - process.stdout.write('{}'); - process.exit(0); -} - -function emit(obj) { - process.stdout.write(JSON.stringify(obj || {})); - process.exit(0); -} - -function readStdin() { - try { return readFileSync(0, 'utf8'); } catch { return ''; } -} - -async function main() { - readStdin(); - - const root = resolveRepoRoot(); - const { id: taskId } = resolveActiveTaskId(root); - - if (taskId) return emit({}); - - const payload = readOnboardingMarker(root); - if (!payload) return emit({}); - - return emit({ - hookSpecificOutput: { - hookEventName: 'PostToolUse', - additionalContext: payload, - }, - }); -} - -main().catch(err => { - process.stderr.write(`post-tool-use hook error: ${err?.message || err}\n`); - emit({}); -}); diff --git a/.claude/hooks/session-start.mjs b/.claude/hooks/session-start.mjs index 9bebbfe45..7a7d636f0 100755 --- a/.claude/hooks/session-start.mjs +++ b/.claude/hooks/session-start.mjs @@ -1,7 +1,9 @@ #!/usr/bin/env node // Claude Code SessionStart hook. Mirrors the Cursor sessionStart hook: -// injects the active task's scope (or the onboarding trigger, or a -// bootstrap warning) into the agent's initial context. +// injects the active scope (or a bootstrap warning) into the agent's +// initial context. Source of truth is the local DKG daemon — the union +// of `tasks:scopedToPath` across every `tasks:Task` whose status is +// `in_progress` and which is `prov:wasAttributedTo` this agent. import { readFileSync } from 'node:fs'; import { resolve, dirname } from 'node:path'; @@ -11,11 +13,9 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; -const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; const { - resolveRepoRoot, resolveActiveTaskId, loadTask, checkNodeVersion, isBootstrapActive, + resolveRepoRoot, resolveActiveScope, checkNodeVersion, isBootstrapActive, } = await import(scopeUrl); -const { consumeOnboardingMarker } = await import(onboardUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -41,15 +41,10 @@ function readStdin() { async function main() { readStdin(); const root = resolveRepoRoot(); - const { id: taskId, source } = resolveActiveTaskId(root); + const scope = await resolveActiveScope({ root, force: true }); const bootstrap = isBootstrapActive(root); - const onboarding = !taskId ? consumeOnboardingMarker(root) : null; - const header = []; - if (onboarding) { - header.push(onboarding, ''); - } if (bootstrap) { header.push( '# agent-scope: BOOTSTRAP MODE ACTIVE', @@ -64,73 +59,87 @@ async function main() { ); } - if (!taskId) { - if (!bootstrap && !onboarding) return emit(null); - if (onboarding && !bootstrap) return emit(header.join('\n').trim()); + if (scope.reason !== 'ok') { + if (!bootstrap) { + if (scope.reason === 'daemon-unreachable' || scope.reason === 'configuration-error') { + return emit([ + '# agent-scope: scope source unavailable', + '', + `Scope can't be resolved right now (${scope.reason}). Only the hardcoded`, + 'protected path list is enforced; everything else is writable.', + scope.diagnostic ? '' : null, + scope.diagnostic ? `Diagnostic: ${scope.diagnostic}` : null, + ].filter((l) => l !== null).join('\n')); + } + return emit(null); + } return emit(header.concat([ - '# agent-scope: no active task', + '# agent-scope: no in-progress task', '', - 'Bootstrap is active but no task is set. System files are currently', - 'writable. When you finish the protected work, remove the token:', + 'Bootstrap is active but no `tasks:Task` is currently in_progress for this', + 'agent. System files are writable. When the protected work is done, run:', ' rm agent-scope/.bootstrap-token', ]).join('\n')); } - let task; - try { task = loadTask(root, taskId); } - catch (e) { - return emit(header.concat([ - `# agent-scope: ACTIVE TASK MANIFEST BROKEN (${taskId})`, - '', - `The manifest at agent-scope/tasks/${taskId}.json failed to load:`, - ` ${e.message}`, - '', - 'All writes will be denied until this is fixed. STOP and report this to the user.', - ]).join('\n')); - } + const tasks = Array.isArray(scope.tasks) ? scope.tasks : []; + const allowedPositive = (scope.allowed || []).filter((p) => !p.startsWith('!')); + const allowedNegative = (scope.allowed || []).filter((p) => p.startsWith('!')); + const exemptionsPositive = (scope.exemptions || []).filter((p) => !p.startsWith('!')); + const exemptionsNegative = (scope.exemptions || []).filter((p) => p.startsWith('!')); - const allowedPositive = (task.allowed || []).filter(p => !p.startsWith('!')); - const allowedNegative = (task.allowed || []).filter(p => p.startsWith('!')); - const exemptionsPositive = (task.exemptions || []).filter(p => !p.startsWith('!')); - const exemptionsNegative = (task.exemptions || []).filter(p => p.startsWith('!')); + const heading = tasks.length === 1 + ? `# agent-scope: active task — ${tasks[0].uri}` + : `# agent-scope: ${tasks.length} active in-progress tasks`; - const lines = header.concat([ - `# agent-scope: active task — ${task.id}`, - '', - `**Description:** ${task.description || '(none)'}`, - task.owner ? `**Owner:** ${task.owner}` : null, - `**Resolved from:** ${source}`, - task.__inheritedFrom && task.__inheritedFrom.length ? `**Inherits from:** ${task.__inheritedFrom.join(', ')}` : null, - '', - '## You may modify files matching:', - ...(allowedPositive.length ? allowedPositive.map(p => `- \`${p}\``) : ['- (nothing)']), - ]); + const lines = header.concat([heading, '']); + if (tasks.length === 1) { + const t = tasks[0]; + lines.push(`**Task:** ${t.title || '(untitled)'}`); + if (t.assignee) lines.push(`**Assignee:** ${t.assignee}`); + } else { + lines.push('## In-progress tasks'); + for (const t of tasks) { + lines.push(`- \`${t.uri}\` — ${t.title || '(untitled)'}`); + } + } + if (scope.agentUri) lines.push(`**Agent:** ${scope.agentUri}`); + if (scope.projectId) lines.push(`**Project:** ${scope.projectId}`); + lines.push(''); + + lines.push( + '## You may modify files matching the union of these globs:', + ...(allowedPositive.length ? allowedPositive.map((p) => `- \`${p}\``) : ['- (nothing — every in-progress task has empty `tasks:scopedToPath`)']), + ); if (exemptionsPositive.length) { - lines.push('', '## Always allowed (build artifacts, lockfiles):'); + lines.push('', '## Always allowed (build artefacts, lockfiles):'); for (const p of exemptionsPositive) lines.push(`- \`${p}\``); } if (allowedNegative.length || exemptionsNegative.length) { lines.push('', '## Explicitly denied (even if they look in-scope):'); for (const p of [...allowedNegative, ...exemptionsNegative]) lines.push(`- \`${p}\``); } - if (task.notes) { - lines.push('', '## Task notes', task.notes); - } + lines.push( '', '## Rules', '- You may **read** any file in the repo.', '- You may **write** only files matching the patterns above.', '- System files (`.cursor/hooks/**`, `.claude/hooks/**`, `agent-scope/lib/**`, etc.) are hardcode-protected regardless of task.' + (bootstrap ? ' (currently bypassed by bootstrap mode)' : ''), - '- If you believe an out-of-scope file must be changed for this task, STOP and ask the user for explicit approval. The user will grant approval by editing the manifest.', - '- A Claude Code hook enforces this on every Write/Edit/Delete. A pre-Bash hook blocks destructive shell commands on denied paths. A post-Bash hook reverts anything that slipped through.', - '- To clear or switch tasks, ask the user — do not edit `agent-scope/active` yourself.', + '- The allow-list is computed live from the local DKG daemon. To extend scope:', + ' call `dkg_add_task` with `status: "in_progress"` and a `scopedToPath` glob covering', + ' the new path; the cache will pick it up within ~5s.', + '- When a task is done, call `dkg_update_task_status({ taskUri, status: "done" })`.', + ' The next scope read will drop its globs from the union automatically.', + '- A Claude Code hook enforces this on every Write/Edit/Delete; pre-Bash blocks', + ' destructive shell commands on denied paths; post-Bash reverts anything that', + ' slipped through.', ); - emit(lines.filter(l => l !== null).join('\n')); + emit(lines.filter((l) => l !== null).join('\n')); } -main().catch(err => { +main().catch((err) => { process.stderr.write(`session-start hook error: ${err?.message || err}\n`); emit(null); }); diff --git a/.claude/hooks/shell-diff-check.mjs b/.claude/hooks/shell-diff-check.mjs index 136c10359..c2932d369 100755 --- a/.claude/hooks/shell-diff-check.mjs +++ b/.claude/hooks/shell-diff-check.mjs @@ -9,6 +9,10 @@ // - out-of-task-scope, not protected → DELETED // - in-scope or exempt → left alone // +// Source of truth for "in-scope" is the local DKG daemon — the union of +// `tasks:scopedToPath` across every `in_progress` task attributed to this +// agent. See agent-scope/lib/scope.mjs + dkg-source.mjs. +// // Output format: PostToolUse can return additional_context which becomes // part of the next agent turn's context (so the agent SEES that we // reverted its changes). @@ -24,13 +28,11 @@ const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; -const parseUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/shell-parse.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, checkNodeVersion, } = await import(scopeUrl); const { logDenial } = await import(logUrl); const { buildAfterShellContext } = await import(denialUrl); -const { extractTaskCreateId, approvedTaskCreateWrites } = await import(parseUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -78,60 +80,18 @@ async function main() { const root = resolveRepoRoot(); const { id: taskId } = resolveActiveTaskId(root); - - let task = null; - if (taskId) { try { task = loadTask(root, taskId); } catch { return emit({}); } } + const task = loadTask(root, taskId); const porcelain = gitPorcelain(root); if (porcelain === null) return emit({}); - // Approved-task-create allowlist: if the command that just ran was - // `pnpm task create ` (or the canonical node equivalent), allow - // the two specific files that command legitimately writes — - // agent-scope/tasks/.json - // agent-scope/active - // Every other protected-path write still gets reverted/deleted. - const approvedId = extractTaskCreateId(command); - const approvedWrites = approvedTaskCreateWrites(approvedId); - const approved = []; - - // Active-task state exemption: the currently active task's manifest and - // the `active` pointer file are legitimate persistent state, not - // collateral from the current command. Without this, a manifest created - // by an earlier `pnpm task create` gets reaped the next time ANY - // unrelated shell command runs (because it shows up as untracked in a - // protected path). Only shield the active-task id — every other - // manifest (including stale ones) is still reverted/deleted. - const activeTaskExemptions = new Set(); - if (taskId) { - activeTaskExemptions.add(`agent-scope/tasks/${taskId}.json`); - activeTaskExemptions.add('agent-scope/active'); - } - const entries = parsePorcelain(porcelain); const outOfScope = entries.filter(({ path }) => { if (!path) return false; const d = checkPath(task, path, root); - if (d !== 'deny' && d !== 'protected') return false; - if (approvedWrites.has(path)) { approved.push(path); return false; } - if (activeTaskExemptions.has(path)) return false; - return true; + return d === 'deny' || d === 'protected'; }); - if (approved.length) { - for (const p of approved) { - logDenial(root, { - event: 'afterShell.approved-create', - tool: 'Bash', - path: p, - task: approvedId, - command, - sessionId, - agent: 'claude-code', - }); - } - } - if (outOfScope.length === 0) return emit({}); const reverted = []; @@ -180,7 +140,7 @@ async function main() { }); } -main().catch(err => { +main().catch((err) => { process.stderr.write(`shell-diff-check error: ${err?.message || err}\n`); emit({}); }); diff --git a/.claude/hooks/user-prompt-submit.mjs b/.claude/hooks/user-prompt-submit.mjs index 04a6f099a..37c7209be 100755 --- a/.claude/hooks/user-prompt-submit.mjs +++ b/.claude/hooks/user-prompt-submit.mjs @@ -1,16 +1,9 @@ #!/usr/bin/env node // Claude Code UserPromptSubmit hook. Fires BEFORE the agent processes the -// user's message. This is the primary onboarding trigger in Claude Code -// because — unlike Cursor's beforeSubmitPrompt — Claude Code lets us -// inject additional_context here, so we get reliable transparent -// onboarding even for purely conversational messages ("hi") in any chat, -// new or existing. -// -// One-shot: consumeOnboardingMarker is atomic, so the trigger fires for -// exactly one user message after `pnpm task start`. -// -// We ALSO surface the bootstrap warning here so the user/agent never -// forget bootstrap is on between turns. +// user's message. We use it to surface the bootstrap warning so the +// user/agent never forget bootstrap is on between turns. Onboarding is +// gone (the local task-manifest flow has been replaced by DKG-driven +// scope), so this hook now exists purely for the bootstrap reminder. import { readFileSync } from 'node:fs'; import { resolve, dirname } from 'node:path'; @@ -19,13 +12,8 @@ import { fileURLToPath, pathToFileURL } from 'node:url'; const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); -const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; -const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; - -const { - resolveRepoRoot, resolveActiveTaskId, checkNodeVersion, isBootstrapActive, -} = await import(scopeUrl); -const { consumeOnboardingMarker } = await import(onboardUrl); +const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; +const { resolveRepoRoot, checkNodeVersion, isBootstrapActive } = await import(scopeUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -45,39 +33,23 @@ function readStdin() { async function main() { readStdin(); const root = resolveRepoRoot(); - const { id: taskId } = resolveActiveTaskId(root); - const bootstrap = isBootstrapActive(root); - - // Active task → silent. The session-start hook already injected the - // active-task block; we don't want to re-inject it on every prompt. - if (taskId) return emit({}); - - // No active task → check for onboarding marker. - const onboarding = consumeOnboardingMarker(root); - - if (!onboarding && !bootstrap) return emit({}); - - const blocks = []; - if (onboarding) blocks.push(onboarding); - if (bootstrap) { - blocks.push([ - '# agent-scope: BOOTSTRAP MODE ACTIVE', - '', - 'Hardcoded path protection is currently DISABLED. Writes to system files', - 'are permitted. If you are not improving agent-scope itself, ask the user', - 'to run: rm agent-scope/.bootstrap-token', - ].join('\n')); - } + if (!isBootstrapActive(root)) return emit({}); emit({ hookSpecificOutput: { hookEventName: 'UserPromptSubmit', - additionalContext: blocks.join('\n\n'), + additionalContext: [ + '# agent-scope: BOOTSTRAP MODE ACTIVE', + '', + 'Hardcoded path protection is currently DISABLED. Writes to system files', + 'are permitted. If you are not improving agent-scope itself, ask the user', + 'to run: rm agent-scope/.bootstrap-token', + ].join('\n'), }, }); } -main().catch(err => { +main().catch((err) => { process.stderr.write(`user-prompt-submit hook error: ${err?.message || err}\n`); emit({}); }); diff --git a/.claude/settings.json b/.claude/settings.json index b7e1e6d89..960bd5696 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -34,11 +34,6 @@ "hooks": [ { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/shell-diff-check.mjs", "timeout": 10 } ] - }, - { - "hooks": [ - { "type": "command", "command": "$CLAUDE_PROJECT_DIR/.claude/hooks/post-tool-use.mjs", "timeout": 5 } - ] } ] } diff --git a/.cursor/hooks.json b/.cursor/hooks.json index a041f60df..683401a3e 100644 --- a/.cursor/hooks.json +++ b/.cursor/hooks.json @@ -1,6 +1,6 @@ { "version": 1, - "_comment": "Two purposes: (1) agent-scope hooks enforce task-scoped writes (sessionStart/preToolUse/postToolUse/beforeShellExecution/afterShellExecution/stop). (2) DKG chat-capture hooks (sessionStart/sessionEnd/beforeSubmitPrompt/afterAgentResponse via packages/mcp-dkg/hooks/capture-chat.mjs) auto-promote each conversation turn to the chat sub-graph of the project pinned in .dkg/config.yaml. See agent-scope/README.md and packages/mcp-dkg/README.md.", + "_comment": "Two purposes: (1) agent-scope hooks enforce DKG-derived scope on writes (sessionStart/preToolUse/beforeShellExecution/afterShellExecution). The active scope is the union of `tasks:scopedToPath` across in-progress `tasks:Task` entities attributed to this agent — see agent-scope/README.md. (2) DKG chat-capture hooks (sessionStart/sessionEnd/beforeSubmitPrompt/afterAgentResponse via packages/mcp-dkg/hooks/capture-chat.mjs) auto-promote each conversation turn to the chat sub-graph of the project pinned in .dkg/config.yaml.", "hooks": { "sessionStart": [ { @@ -39,13 +39,6 @@ "timeout": 5 } ], - "postToolUse": [ - { - "command": ".cursor/hooks/post-tool-use.mjs", - "failClosed": false, - "timeout": 5 - } - ], "beforeShellExecution": [ { "command": ".cursor/hooks/shell-precheck.mjs", @@ -59,14 +52,6 @@ "failClosed": false, "timeout": 10 } - ], - "stop": [ - { - "command": ".cursor/hooks/stop.mjs", - "failClosed": false, - "timeout": 5, - "loop_limit": 1 - } ] } } diff --git a/.cursor/hooks/post-tool-use.mjs b/.cursor/hooks/post-tool-use.mjs deleted file mode 100755 index bd39aaa4c..000000000 --- a/.cursor/hooks/post-tool-use.mjs +++ /dev/null @@ -1,80 +0,0 @@ -#!/usr/bin/env node -// Cursor postToolUse hook. Fires after every tool the agent calls. -// -// Purpose: if a pending `agent-scope/.pending-onboarding` marker exists -// (written by `pnpm task start`), inject its content as -// `additional_context`. READ-ONLY — does NOT delete the marker. -// -// Why peek and not consume? -// Earlier versions did read-and-delete here. That created a nasty race in -// existing Cursor chats: the first tool call the agent made (for any -// reason) deleted the marker, but Cursor's `additional_context` from -// postToolUse did not reliably land in the current turn's visible -// context. Net result: marker gone, agent never saw the payload, agent -// reports "something was here but I can't see it". -// -// New lifecycle — the marker persists until one of these authoritative -// consumers runs: -// - `sessionStart` hook (new chat: delete + inject once) -// - `stop` hook (end-of-turn in existing Cursor chat: -// delete + re-submit as next user -// message via followup_message, which -// IS Cursor-guaranteed) -// - `pnpm task create --activate` (success = "I processed it") -// - `pnpm task clear` (user abandons) -// -// This hook stays as the fast-path best-effort injection: if Cursor DOES -// stitch additional_context into the current turn, the agent reacts -// immediately. If it doesn't, `stop` is the safety net. Either way the -// marker survives until an authoritative deleter runs. -// -// Re-injection noise (same payload on every tool call) is harmless — -// additional_context is internal, never shown to the user, and the -// onboarding protocol is idempotent. - -import { readFileSync } from 'node:fs'; -import { resolve, dirname } from 'node:path'; -import { fileURLToPath, pathToFileURL } from 'node:url'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; -const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; - -const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); -const { readOnboardingMarker } = await import(onboardUrl); - -try { checkNodeVersion(); } catch (e) { - process.stderr.write(e.message + '\n'); - process.stdout.write('{}'); - process.exit(0); -} - -function emit(obj) { - process.stdout.write(JSON.stringify(obj || {})); - process.exit(0); -} - -function readStdin() { - try { return readFileSync(0, 'utf8'); } catch { return ''; } -} - -async function main() { - readStdin(); - - const root = resolveRepoRoot(); - const { id: taskId } = resolveActiveTaskId(root); - - if (taskId) return emit({}); - - const payload = readOnboardingMarker(root); - if (!payload) return emit({}); - - return emit({ additional_context: payload }); -} - -main().catch(err => { - process.stderr.write(`post-tool-use hook error: ${err?.message || err}\n`); - emit({}); -}); diff --git a/.cursor/hooks/session-start.mjs b/.cursor/hooks/session-start.mjs index 4ec97a5c9..04d6b7075 100755 --- a/.cursor/hooks/session-start.mjs +++ b/.cursor/hooks/session-start.mjs @@ -1,7 +1,10 @@ #!/usr/bin/env node -// Cursor sessionStart hook. Injects the active task's scope into the agent's +// Cursor sessionStart hook. Injects the active scope into the agent's // initial context so the agent knows what it may modify without having to -// hit a deny first. Also surfaces bootstrap-mode status. +// hit a deny first. Source of truth is now the local DKG daemon (the +// union of `tasks:scopedToPath` across every `tasks:Task` whose status is +// `in_progress` and which is `prov:wasAttributedTo` this agent). Also +// surfaces bootstrap-mode status. import { readFileSync } from 'node:fs'; import { resolve, dirname } from 'node:path'; @@ -11,11 +14,9 @@ const __filename = fileURLToPath(import.meta.url); const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; -const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; const { - resolveRepoRoot, resolveActiveTaskId, loadTask, checkNodeVersion, isBootstrapActive, + resolveRepoRoot, resolveActiveScope, checkNodeVersion, isBootstrapActive, } = await import(scopeUrl); -const { consumeOnboardingMarker } = await import(onboardUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -36,17 +37,13 @@ function readStdin() { async function main() { readStdin(); const root = resolveRepoRoot(); - const { id: taskId, source } = resolveActiveTaskId(root); - const bootstrap = isBootstrapActive(root); - // If the user ran `pnpm task start`, consume the marker (one-shot) and - // prepend the onboarding trigger to whatever else this hook would emit. - const onboarding = !taskId ? consumeOnboardingMarker(root) : null; + // Force a fresh daemon read at session start; subsequent hooks can + // hit the 5-s cache without surprise. + const scope = await resolveActiveScope({ root, force: true }); + const bootstrap = isBootstrapActive(root); const header = []; - if (onboarding) { - header.push(onboarding, ''); - } if (bootstrap) { header.push( '# agent-scope: BOOTSTRAP MODE ACTIVE', @@ -61,77 +58,91 @@ async function main() { ); } - if (!taskId) { - // No task + no bootstrap + no pending onboarding → the system is fully - // invisible. The agent behaves like agent-scope doesn't exist. - if (!bootstrap && !onboarding) return emit(null); - // Pending onboarding but no bootstrap → emit only the onboarding - // trigger so the agent's focus lands on the onboarding protocol. - if (onboarding && !bootstrap) return emit(header.join('\n').trim()); + // No active scope ⇒ stay invisible (and only surface bootstrap if it's on). + if (scope.reason !== 'ok') { + if (!bootstrap) { + // Diagnostic-only: if the daemon is actually unreachable or the + // workspace isn't configured, hint quietly so the user can fix it, + // but don't make this loud — agent-scope is opt-in. + if (scope.reason === 'daemon-unreachable' || scope.reason === 'configuration-error') { + return emit([ + '# agent-scope: scope source unavailable', + '', + `Scope can't be resolved right now (${scope.reason}). Only the hardcoded`, + 'protected path list is enforced; everything else is writable.', + scope.diagnostic ? '' : null, + scope.diagnostic ? `Diagnostic: ${scope.diagnostic}` : null, + ].filter((l) => l !== null).join('\n')); + } + return emit(null); + } return emit(header.concat([ - '# agent-scope: no active task', + '# agent-scope: no in-progress task', '', - 'Bootstrap is active but no task is set. System files are currently', - 'writable. When you finish the protected work, remove the token:', + 'Bootstrap is active but no `tasks:Task` is currently in_progress for this', + 'agent. System files are writable. When the protected work is done, run:', ' rm agent-scope/.bootstrap-token', ]).join('\n')); } - let task; - try { task = loadTask(root, taskId); } - catch (e) { - return emit(header.concat([ - `# agent-scope: ACTIVE TASK MANIFEST BROKEN (${taskId})`, - '', - `The manifest at agent-scope/tasks/${taskId}.json failed to load:`, - ` ${e.message}`, - '', - 'All writes will be denied until this is fixed. STOP and report this to the user.', - ]).join('\n')); - } + const tasks = Array.isArray(scope.tasks) ? scope.tasks : []; + const allowedPositive = (scope.allowed || []).filter((p) => !p.startsWith('!')); + const allowedNegative = (scope.allowed || []).filter((p) => p.startsWith('!')); + const exemptionsPositive = (scope.exemptions || []).filter((p) => !p.startsWith('!')); + const exemptionsNegative = (scope.exemptions || []).filter((p) => p.startsWith('!')); - const allowedPositive = (task.allowed || []).filter(p => !p.startsWith('!')); - const allowedNegative = (task.allowed || []).filter(p => p.startsWith('!')); - const exemptionsPositive = (task.exemptions || []).filter(p => !p.startsWith('!')); - const exemptionsNegative = (task.exemptions || []).filter(p => p.startsWith('!')); + const heading = tasks.length === 1 + ? `# agent-scope: active task — ${tasks[0].uri}` + : `# agent-scope: ${tasks.length} active in-progress tasks`; - const lines = header.concat([ - `# agent-scope: active task — ${task.id}`, - '', - `**Description:** ${task.description || '(none)'}`, - task.owner ? `**Owner:** ${task.owner}` : null, - `**Resolved from:** ${source}`, - task.__inheritedFrom && task.__inheritedFrom.length ? `**Inherits from:** ${task.__inheritedFrom.join(', ')}` : null, - '', - '## You may modify files matching:', - ...(allowedPositive.length ? allowedPositive.map(p => `- \`${p}\``) : ['- (nothing)']), - ]); + const lines = header.concat([heading, '']); + if (tasks.length === 1) { + const t = tasks[0]; + lines.push(`**Task:** ${t.title || '(untitled)'}`); + if (t.assignee) lines.push(`**Assignee:** ${t.assignee}`); + } else { + lines.push('## In-progress tasks'); + for (const t of tasks) { + lines.push(`- \`${t.uri}\` — ${t.title || '(untitled)'}`); + } + } + if (scope.agentUri) lines.push(`**Agent:** ${scope.agentUri}`); + if (scope.projectId) lines.push(`**Project:** ${scope.projectId}`); + lines.push(''); + + lines.push( + '## You may modify files matching the union of these globs:', + ...(allowedPositive.length ? allowedPositive.map((p) => `- \`${p}\``) : ['- (nothing — every in-progress task has empty `tasks:scopedToPath`)']), + ); if (exemptionsPositive.length) { - lines.push('', '## Always allowed (build artifacts, lockfiles):'); + lines.push('', '## Always allowed (build artefacts, lockfiles):'); for (const p of exemptionsPositive) lines.push(`- \`${p}\``); } if (allowedNegative.length || exemptionsNegative.length) { lines.push('', '## Explicitly denied (even if they look in-scope):'); for (const p of [...allowedNegative, ...exemptionsNegative]) lines.push(`- \`${p}\``); } - if (task.notes) { - lines.push('', '## Task notes', task.notes); - } + lines.push( '', '## Rules', '- You may **read** any file in the repo.', '- You may **write** only files matching the patterns above.', '- System files (`.cursor/hooks/**`, `agent-scope/lib/**`, etc.) are hardcode-protected regardless of task.' + (bootstrap ? ' (currently bypassed by bootstrap mode)' : ''), - '- If you believe an out-of-scope file must be changed for this task, STOP and ask the user for explicit approval. The user will grant approval by editing the manifest.', - '- A Cursor hook enforces this on every Write/Edit/Delete. A pre-shell hook blocks destructive shell commands on denied paths. A post-shell hook reverts anything that slipped through. Pre-commit and CI also block out-of-scope commits.', - '- To clear or switch tasks, ask the user — do not edit `agent-scope/active` yourself.', + '- The allow-list is computed live from the local DKG daemon. To extend scope:', + ' call `dkg_add_task` with `status: "in_progress"` and a `scopedToPath` glob covering', + ' the new path; the cache will pick it up within ~5s.', + '- When a task is done, call `dkg_update_task_status({ taskUri, status: "done" })`.', + ' The next scope read will drop its globs from the union automatically.', + '- A Cursor `preToolUse` hook enforces this on every Write/Edit/Delete; pre-shell', + ' blocks destructive shell commands on denied paths; post-shell reverts anything', + ' that slipped through.', ); - emit(lines.filter(l => l !== null).join('\n')); + emit(lines.filter((l) => l !== null).join('\n')); } -main().catch(err => { +main().catch((err) => { process.stderr.write(`session-start hook error: ${err?.message || err}\n`); emit(null); }); diff --git a/.cursor/hooks/shell-diff-check.mjs b/.cursor/hooks/shell-diff-check.mjs index 3cfca3d5f..2082eb36f 100755 --- a/.cursor/hooks/shell-diff-check.mjs +++ b/.cursor/hooks/shell-diff-check.mjs @@ -7,6 +7,10 @@ // via `node -e` / `python -c` bypass of pre-shell) // - out-of-task-scope, not protected → DELETED (matches default-deny intent) // - in-scope or exempt → left alone +// +// Source of truth for "in-scope" is the local DKG daemon — the union of +// `tasks:scopedToPath` across every `in_progress` task attributed to this +// agent. See agent-scope/lib/scope.mjs + dkg-source.mjs. import { readFileSync, rmSync, existsSync } from 'node:fs'; import { execSync } from 'node:child_process'; @@ -19,13 +23,11 @@ const __dirname = dirname(__filename); const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; const logUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/log.mjs')).href; const denialUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/denial.mjs')).href; -const parseUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/shell-parse.mjs')).href; const { resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, checkNodeVersion, } = await import(scopeUrl); const { logDenial } = await import(logUrl); const { buildAfterShellContext } = await import(denialUrl); -const { extractTaskCreateId, approvedTaskCreateWrites } = await import(parseUrl); try { checkNodeVersion(); } catch (e) { process.stderr.write(e.message + '\n'); @@ -68,59 +70,18 @@ async function main() { const root = resolveRepoRoot(); const { id: taskId } = resolveActiveTaskId(root); - - let task = null; - if (taskId) { try { task = loadTask(root, taskId); } catch { return emit({}); } } + const task = loadTask(root, taskId); const porcelain = gitPorcelain(root); if (porcelain === null) return emit({}); - // Approved-task-create allowlist: if the command that just ran was - // `pnpm task create ` (or the canonical node equivalent), we allow - // the two specific files that command legitimately writes — - // agent-scope/tasks/.json - // agent-scope/active - // Every other protected-path write still gets reverted/deleted. - const approvedId = extractTaskCreateId(command); - const approvedWrites = approvedTaskCreateWrites(approvedId); - const approved = []; - - // Active-task state exemption: the currently active task's manifest and - // the `active` pointer file are legitimate persistent state, not - // collateral from the current command. Without this, a manifest created - // by an earlier `pnpm task create` gets reaped the next time ANY - // unrelated shell command runs (because it shows up as untracked in a - // protected path). Only shield the active-task id — every other - // manifest (including stale ones) is still reverted/deleted. - const activeTaskExemptions = new Set(); - if (taskId) { - activeTaskExemptions.add(`agent-scope/tasks/${taskId}.json`); - activeTaskExemptions.add('agent-scope/active'); - } - const entries = parsePorcelain(porcelain); const outOfScope = entries.filter(({ path }) => { if (!path) return false; const d = checkPath(task, path, root); - if (d !== 'deny' && d !== 'protected') return false; - if (approvedWrites.has(path)) { approved.push(path); return false; } - if (activeTaskExemptions.has(path)) return false; - return true; + return d === 'deny' || d === 'protected'; }); - if (approved.length) { - for (const p of approved) { - logDenial(root, { - event: 'afterShell.approved-create', - tool: 'Shell', - path: p, - task: approvedId, - command, - sessionId, - }); - } - } - if (outOfScope.length === 0) return emit({}); const reverted = []; @@ -168,7 +129,7 @@ async function main() { emit({ additional_context: message }); } -main().catch(err => { +main().catch((err) => { process.stderr.write(`shell-diff-check error: ${err?.message || err}\n`); emit({}); }); diff --git a/.cursor/hooks/stop.mjs b/.cursor/hooks/stop.mjs deleted file mode 100755 index c98885a49..000000000 --- a/.cursor/hooks/stop.mjs +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env node -// Cursor `stop` hook. Fires when the agent finishes an assistant turn. -// -// Purpose: the onboarding last-ditch trigger for existing chats. Cursor's -// hook API has no equivalent of Claude Code's `UserPromptSubmit`, so when -// the user sends a purely conversational message ("hi", "start working") -// in an existing chat AND the agent replies without calling a tool, -// neither `sessionStart` nor `postToolUse` fires — the pending-onboarding -// marker sits there untouched and the agent never learns about it. -// -// This hook closes that gap. When the agent's reply finishes and a -// marker is still pending (and no task is active yet), we auto-submit -// the onboarding trigger as the next user message via `followup_message`. -// Cursor then feeds that as the next user turn, so the agent pivots to -// the Task onboarding protocol on its very next reply. -// -// Cost: one generic agent reply before onboarding kicks in. -// Benefit: no silent-failure case anymore, regardless of whether the -// user starts a new chat or reuses an existing one. -// -// One-shot: consumeOnboardingMarker reads + deletes atomically, so the -// followup fires exactly once per `pnpm task start`. The `loop_limit: 1` -// setting in hooks.json is a belt-and-suspenders cap. - -import { readFileSync } from 'node:fs'; -import { resolve, dirname } from 'node:path'; -import { fileURLToPath, pathToFileURL } from 'node:url'; - -const __filename = fileURLToPath(import.meta.url); -const __dirname = dirname(__filename); - -const scopeUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/scope.mjs')).href; -const onboardUrl = pathToFileURL(resolve(__dirname, '../../agent-scope/lib/onboarding.mjs')).href; - -const { resolveRepoRoot, resolveActiveTaskId, checkNodeVersion } = await import(scopeUrl); -const { consumeOnboardingMarker } = await import(onboardUrl); - -try { checkNodeVersion(); } catch (e) { - process.stderr.write(e.message + '\n'); - process.stdout.write('{}'); - process.exit(0); -} - -function emit(obj) { - process.stdout.write(JSON.stringify(obj || {})); - process.exit(0); -} - -function readStdin() { - try { return readFileSync(0, 'utf8'); } catch { return ''; } -} - -async function main() { - // The stop hook receives {status, loop_count, ...}. We don't care about - // it — we only act when a pending-onboarding marker is still there. - readStdin(); - - const root = resolveRepoRoot(); - const { id: taskId } = resolveActiveTaskId(root); - - // Active task → onboarding already happened (or irrelevant). Nothing to do. - if (taskId) return emit({}); - - const payload = consumeOnboardingMarker(root); - if (!payload) return emit({}); - - // Cursor will auto-submit `followup_message` as the next user message. - // The payload already contains the full onboarding protocol + the user's - // task description, so the agent's next turn has everything it needs. - return emit({ followup_message: payload }); -} - -main().catch(err => { - process.stderr.write(`stop hook error: ${err?.message || err}\n`); - emit({}); -}); diff --git a/.cursor/rules/agent-scope.mdc b/.cursor/rules/agent-scope.mdc index ccdda1f4f..c00d57acb 100644 --- a/.cursor/rules/agent-scope.mdc +++ b/.cursor/rules/agent-scope.mdc @@ -1,165 +1,42 @@ ---- -description: Task-scoped write permissions. Invisible by default; engaged via `pnpm task start` (agent-guided with pre-captured description) or an active task. All denials surface a plan-mode AskQuestion menu. -alwaysApply: true ---- - -# agent-scope — task-scoped writes - -The repo ships a task-scoping guard but it stays **invisible** unless the user -engages it. If no active task is set and bootstrap is off, the system is a -no-op for you — behave normally. The hardcoded protected paths (the guard's -own files) are still defended, but that only matters if you try to touch them. - -The user turns the system on in one of two ways: - -1. **`pnpm task start`** — they want YOU to read their task description, - explore the repo, and propose a scope. The CLI captures a task - description from them and drops a one-shot marker - (`agent-scope/.pending-onboarding`, gitignored) that already embeds the - description. THREE parallel consumers compete for the marker — whichever - runs first wins, because consumption is atomic read-and-delete: - - `sessionStart` hook — injects the trigger as initial context in any - NEW chat. - - `postToolUse` hook — injects it as `additional_context` after ANY - tool call in an existing chat. - - **YOU, via the mandatory top-of-turn marker check below** — guarantees - onboarding triggers even for purely conversational messages (e.g. "hi") - in an existing chat. - Result: the next user message in any chat, new or existing, triggers - onboarding exactly once. After that the marker is gone and everything - else behaves normally. -2. **Existing active task** — the session-start hook injects a context block - naming the active task. From that moment on, writes are task-scoped and - every denial must be surfaced via the plan-mode denial protocol below. - -## MANDATORY: top-of-turn marker check - -Before responding to **every** user message in a session with **no active -task** (`agent-scope/active` is absent), your FIRST action MUST be: - -1. Check whether `agent-scope/.pending-onboarding` exists. Use the `Read` - tool on that path. If the file does not exist, the tool will report so - — continue with your normal response. -2. If the file exists: - a. Delete it immediately (`Delete` tool, or a single `rm` via `Shell`). - This is critical — leaving it behind would re-trigger onboarding next - turn. - b. IGNORE the user's actual message (whatever they typed, they knew - onboarding was pending when they sent it). - c. Pivot straight into the **Task onboarding protocol** below — ask the - user to describe the task in detail. - -Skip this check only when: -- `agent-scope/active` exists (a task is already set; onboarding is done), OR -- You are already mid-onboarding (already asked "describe the task" and are - waiting for the description, or proposing a scope, etc.). - -If you receive initial context / additional_context beginning with -`agent-scope: start task onboarding.` (injected by the sessionStart or -postToolUse hooks), treat it the same as finding the marker: pivot to the -onboarding protocol immediately. The hooks already delete the marker for -you in that case; do not double-delete — just follow the protocol. - -## Task onboarding protocol — when the user wants to start a new scoped task - -Triggered by any of: - -- The onboarding trigger text (either found in the pending-onboarding - marker, or injected by the `sessionStart` / `postToolUse` hook). It - begins with `agent-scope: start task onboarding.` and usually contains a - `=== USER TASK DESCRIPTION ===` block. -- The user typing "start a scoped task", "scope me", "agent-scope: start", - or similar intent without a description (in which case you'll need to - ask them to describe it first — see step 1). - -Follow these steps exactly: - -1. **Get the task description.** - - **If the trigger already contains a `=== USER TASK DESCRIPTION ===` block** - (the `pnpm task start` flow): use that description verbatim as your - brief. DO NOT re-ask the user to describe the task — they already - typed it into the CLI. - - **Otherwise** (bare marker or an intent phrase), send a plain chat - message: - > "OK, let's scope a new task. Describe in detail what we're building - > or fixing — packages, behaviours, tests, any files you already know - > about. The more detail, the tighter the scope I can propose." - - Wait for the reply. - -2. **Explore the codebase.** Use `Glob`, `Grep`, `SemanticSearch`, `Read`, - and the DKG SPARQL queries at the top of this file to locate: - - The package(s) the task touches - - Specific source files implementing the mentioned behaviour - - Sibling test files that cover them - - Related schemas / contracts / migrations if the task mentions them - - Count matching files per package so you can show weights in Q1. - -3. **Draft a scope.** Aim for a set of globs that: - - Covers everything you reasonably expect to touch - - Errs slightly broad (under-scoping causes constant denials mid-work; - over-scoping just gives the user rope they can see via `pnpm task show`) - - Prefers whole-package globs (`packages//**`) over file globs - when in doubt - - Includes matching test files - - Always appends `!**/secrets.*`, `!**/.env*` as safety denies - - Inherits `base` unless there is a reason not to (base supplies the - standard exemptions: `**/dist/**`, lockfiles, build artefacts) - -4. **Propose the scope via a SINGLE `AskQuestion` call with ONE question - and TWO options.** Keep it short and natural — like you're asking a - coworker, not filling out a form. Do NOT include a second question, do - NOT list every package individually, do NOT dump JSON into the prompt. - - - `id`: `scope` - - `allow_multiple`: `false` - - `prompt`: two short sentences. First: a one-line rephrase of what you - understood the task to be. Second: the scope you'd propose, as a short - **numbered** list of globs (3–5 lines max). Finish with "Sound good?" - Example: +# agent-scope — task-scoped writes, sourced from the local DKG + +The repo ships a thin write-time guard, but it stays **invisible** unless +something engages it. If no `tasks:Task` is currently `in_progress` for +your agent and you don't try to touch a protected path, behave normally — +agent-scope is a no-op. + +## Mental model + +The active scope is the **union of `tasks:scopedToPath` globs** across +every `tasks:Task` whose `tasks:status` is `"in_progress"` AND whose +`prov:wasAttributedTo` matches the current agent URI. A small SPARQL +query (cached for ~5s) feeds every hook. There is no separate manifest +file, no `pnpm task` CLI, no "active task" pointer — the DKG is the +source of truth. Calling `dkg_update_task_status({ taskUri, status: +"done" })` removes that task's globs from the union immediately. + +## Starting a task + +When the user gives you work and no covering `in_progress` task exists, +file one with `dkg_add_task`: + +```ts +dkg_add_task({ + taskUri: 'urn:dkg:task:peer-sync-auth', + title: 'Peer sync uses workspace auth', + status: 'in_progress', + assignee: '', + scopedToPath: ['packages/agent/**', 'packages/core/**'], + description: 'Refactor peer-sync to consume the new workspace auth.', +}) +``` - > Refactor peer sync to use the new workspace auth. I'd scope it to: - > 1) `packages/agent/**` - > 2) `packages/core/**` - > 3) inherit `base` (standard build-artefact exemptions) - > - > Sound good? - - - `options` (IDs must match exactly — only these two): - - `go` — `"Yes, go with that"` - - `custom_instruction` — `"Type what you want instead"` - -5. **On `go`:** **YOU (the agent) run the command directly** via the Shell - tool. The `afterShellExecution` hook has a narrow allowlist for the - canonical task-create invocation: `pnpm task create ...` and - `node agent-scope/bin/task.mjs create ...` are the ONLY shapes whose - writes to `agent-scope/tasks/.json` and `agent-scope/active` may - persist. Use the exact globs you proposed (any divergence is a protocol - violation): - - ```bash - pnpm task create \ - --description "" \ - --allowed "" \ - --allowed "" \ - ... - --inherits base \ - --activate - ``` - - After the command succeeds, continue with the actual work in the same - turn. If the command fails, surface the error and re-ask via - AskQuestion instead of retrying blindly. - -6. **On `custom_instruction`:** ask the user in plain chat what they want - changed — e.g. `"What would you like different? (add/remove packages, - tighten globs, different task id, whatever.)"` Then apply their reply to - the draft and re-ask step 4 (still a single question, still two options). - -From step 5 onward, the active task is set and the plan-mode denial -protocol (below) governs every future write. - -## Plan-mode denial protocol — MANDATORY once a task is active +The cache picks it up within ~5s; the next write to those paths +succeeds. To extend scope mid-work, file an additional `in_progress` +task or re-issue `dkg_add_task` for the same `taskUri` with the wider +glob list. + +## Plan-mode denial protocol — MANDATORY once a denial fires Every denial message from agent-scope carries a structured menu. You must **stop**, parse it, and surface it via `AskQuestion` — one question, two @@ -180,8 +57,9 @@ A denial always contains a fenced JSON block: Plus a one-line prose summary starting with `agent-scope:` that you can also key off: -- `preToolUse` / `beforeShellExecution` return `{ permission: "deny" }` - with an `agent_message` containing the fence. +- `preToolUse` returns `{ permission: "deny" }` with an `agent_message` + containing the fence. +- `beforeShellExecution` returns `{ permission: "deny" }` similarly. - `afterShellExecution` returns an `additional_context` containing the fence. Files have already been reverted or deleted. @@ -193,17 +71,19 @@ The JSON shape (TypeScript for clarity): { version: 1, hook: "preToolUse" | "beforeShellExecution" | "afterShellExecution", - reason: "out-of-scope" | "protected" | "manifest-load-error" | "unknown", - humanSummary: string, // short, natural — QUOTE THIS in your prompt + reason: "out-of-scope" | "protected" | "daemon-unreachable" + | "configuration-error" | "unknown", + humanSummary: string, // short, natural — QUOTE THIS in your prompt deniedPath?: string, command?: string, - activeTask: string | null, - simpleOptions: [ // exactly two entries — SURFACE THESE + activeTask: string | null, // legacy field; usually null in DKG mode + activeTaskUris: string[], // current `in_progress` task URIs (canonical) + simpleOptions: [ // exactly two entries — SURFACE THESE { id: string, label: string, action: { kind: "..." /* + fields */ } }, { id: "custom_instruction", label: "Type what you want instead", action: { kind: "custom" } } ], - recommendedOptionId: string, // matches simpleOptions[0].id + recommendedOptionId: string, // matches simpleOptions[0].id options: [ /* full verbose list — do NOT surface to the user */ ], } ``` @@ -218,10 +98,11 @@ The JSON shape (TypeScript for clarity): finish with a simple ask. Keep the whole prompt to 3 sentences max. Example: - > I'd like to edit `packages/evm-module/contracts/S.sol`, but the - > active task `sync` doesn't cover that file. I was trying to update - > the staking integration referenced in the PR. Want me to add it to - > the scope and continue? + > I'd like to edit `packages/evm-module/contracts/S.sol`, but no + > in-progress task covers that file. I was trying to update the + > staking integration referenced in the PR. Want me to file a new + > in-progress task covering `packages/evm-module/contracts/**` and + > continue? - `options`: pass `simpleOptions` from the JSON, verbatim. It always has exactly two entries — the recommended action and a free-text @@ -229,15 +110,15 @@ The JSON shape (TypeScript for clarity): reword options. 4. **Act on the user's choice** using the matching `action` object: - - `add_to_manifest` → edit `agent-scope/tasks/.json`, append the - listed patterns to `allowed`, save, then retry the original operation. - - `switch_task` → run `pnpm task set `, then retry. - - `bootstrap` → print `action.instruction` verbatim. Wait for the user to - enable bootstrap. After the protected work is done, remind them to - `rm agent-scope/.bootstrap-token` to re-lock. - - `fix_manifest` → open the referenced manifest, fix the reported error, - re-run `pnpm task validate `. - - `clear_task` → run `pnpm task clear`. + - `new_in_progress_task` → call `dkg_add_task` with `taskUri` of your + choice, `status: "in_progress"`, your agent URI as `assignee`, and + `scopedToPath: action.suggestedScopedToPath`. Wait ~5s for the + cache, then retry the original operation. + - `bootstrap` → print `action.instruction` verbatim. Wait for the + user to enable bootstrap. After the protected work is done, remind + them to `rm agent-scope/.bootstrap-token` to re-lock. + - `restart_daemon` / `configure_dkg` → print `action.instruction` + verbatim, wait, retry. - `skip` → acknowledge; move on to other in-scope work. - `cancel` → stop the turn; summarise what got done and what didn't. - `custom` → ask the user in plain chat: "OK, what should I do instead?" @@ -247,14 +128,14 @@ The JSON shape (TypeScript for clarity): user's intent, the `custom_instruction` entry is always there — route through it. -### Phrasing rules (for onboarding AND denials) +### Phrasing rules - Write like you're texting a coworker, not filing a ticket. - One short question. One recommendation. One "type what you want" option. - Never use ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). - Don't explain the internal architecture in the prompt — the user doesn't - need to know about hooks or manifests to answer. Keep it action-focused: - "I want to do X. Should I?" + need to know about hooks or SPARQL queries to answer. Keep it + action-focused: "I want to do X. Should I?" - If you need to say why something is restricted, one sentence is enough. - No emoji unless the user uses them first. @@ -276,46 +157,28 @@ Always denied regardless of task, unless a human has enabled bootstrap - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` - `.claude/hooks/**`, `.claude/settings.json` -- `agent-scope/lib/**`, `agent-scope/bin/**`, `agent-scope/schema/**` -- `agent-scope/tasks/**`, `agent-scope/active`, `agent-scope/.bootstrap-token` +- `agent-scope/lib/**` +- `agent-scope/.bootstrap-token` - `AGENTS.md`, `GEMINI.md`, `.cursorrules` -If one of these needs to change, use the `bootstrap` option from the denial -menu — do not try to bypass (no shell redirection, no `node -e`, no alternate -tooling). The `afterShell` hook will delete any untracked files in these paths -even if the bypass succeeded, so retry attempts are wasted. +If one of these needs to change, use the `bootstrap` option from the +denial menu — do not try to bypass (no shell redirection, no `node -e`, +no alternate tooling). The `afterShell` hook will delete any untracked +files in these paths even if the bypass succeeded, so retry attempts are +wasted. Note: the guard operates **only on agent actions**. Humans committing or -pushing manually through their terminal/IDE are not restricted — there are no -git hooks and no CI enforcement. If a human edits a protected file by hand, -they can commit and push normally. - -## Manifest cheat sheet - -```json -{ - "id": "example", - "description": "...", - "inherits": ["base"], - "allowed": [ - "packages/foo/**/*.ts", - "!packages/foo/**/secrets.*" - ], - "exemptions": ["**/dist/**", "pnpm-lock.yaml"] -} -``` - -- Patterns support `*`, `**`, `?`. -- Prefix with `!` for an explicit deny that overrides everything else. -- `inherits` pulls `allowed` + `exemptions` from a parent manifest (e.g. `base`). -- Default-deny: anything not matched is blocked. +pushing manually through their terminal/IDE are not restricted — there +are no git hooks and no CI enforcement. If a human edits a protected +file by hand, they can commit and push normally. ## Don't -- Don't edit `agent-scope/active` yourself — ask the user to switch tasks via - the menu's `switch_task` action, or via `pnpm task set`. -- Don't add patterns to a manifest to unblock yourself — always confirm via - the plan-mode menu first. -- Don't surface the verbose `options` list in AskQuestion — always use +- Don't try to widen scope by editing local files — there are none. + Scope lives in the DKG. +- Don't add a fake `in_progress` task to "self-grant" coverage. Surface + the denial menu, let the human approve scope changes via the + `new_in_progress_task` option. +- Don't surface the verbose `options` list in `AskQuestion` — always use `simpleOptions` (two entries: recommendation + "something else"). - Don't retry a blocked operation with a different tool or command form. diff --git a/.cursorrules b/.cursorrules index 1d06a62e0..0a59ec434 100644 --- a/.cursorrules +++ b/.cursorrules @@ -5,29 +5,31 @@ several VS Code AI extensions (Continue, Cline, Roo) read. Modern Cursor uses `.cursor/rules/agent-scope.mdc` (auto-applied). Claude Code uses `CLAUDE.md`. Codex CLI uses `AGENTS.md`. Gemini CLI uses `GEMINI.md`. -For the full task-scoped-write rules see [`AGENTS.md`](./AGENTS.md). +For the full rules see [`AGENTS.md`](./AGENTS.md). Quick summary: -- This repo has an `agent-scope` system. Read `AGENTS.md` first. -- You can READ anything. You can only WRITE files matching the active - task's `allowed` globs (run `pnpm task show` to see the scope; if there - is no active task, all writes are allowed except hardcoded protected - paths). -- Hardcoded protected paths (the agent-scope system itself, across all - agents): +- This repo has a thin write-time guard called `agent-scope`. It is + **invisible by default** and only activates when (a) at least one + `tasks:Task` is `in_progress` and attributed to your agent in the + local DKG, or (b) you try to touch a hardcoded protected path. +- You can READ anything. You can only WRITE files that fall under the + union of `tasks:scopedToPath` globs across your agent's `in_progress` + tasks. There is no separate manifest file — the DKG is the source of + truth. +- To start a task: call `dkg_add_task({ taskUri, status: "in_progress", + scopedToPath: ["packages/foo/**", ...], assignee: "", ... })`. + The cache picks it up within ~5s. +- To finish: `dkg_update_task_status({ taskUri, status: "done" })`. Its + globs drop out of the union. +- Hardcoded protected paths (the agent-scope guard's own files, + across all agents): - `.cursor/hooks/**`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` - `.claude/hooks/**`, `.claude/settings.json` - - `agent-scope/**` (lib, bin, schema, tasks, active, .bootstrap-token) + - `agent-scope/lib/**`, `agent-scope/.bootstrap-token` - `AGENTS.md`, `GEMINI.md`, `.cursorrules` -- The user's onboarding is `pnpm task start` — they paste a description - in the CLI; a one-shot marker at `agent-scope/.pending-onboarding` is - dropped that already embeds the description in a - `=== USER TASK DESCRIPTION ===` block. Do NOT ask them to describe it - again. On your first action of any turn with no active task, check the - marker — if present, delete it and pivot to the onboarding protocol in - `AGENTS.md`. -- Never invent menu options when surfacing a denial; reuse the JSON - `options` array verbatim and add your reasoning + recommendation. -- Hook-supporting agents (Cursor, Claude Code) physically block out-of-scope - writes. Agents without hooks self-enforce. +- If a write is denied, surface the menu in the denial JSON via + `AskQuestion` (one question, two options — `simpleOptions` verbatim). + Never invent options. +- Hook-supporting agents (Cursor, Claude Code) physically block + out-of-scope writes. Agents without hooks self-enforce. diff --git a/.gitignore b/.gitignore index 4d00dff0e..18deaf950 100644 --- a/.gitignore +++ b/.gitignore @@ -30,9 +30,6 @@ snapshots/_cache_phase1_neuroweb_epoch16.json !.claude/hooks/ .claude/settings.local.json -# agent-scope: never commit the bootstrap override, local audit logs, the -# per-developer active-task pointer, or the one-shot onboarding marker +# agent-scope: never commit the bootstrap override or local audit logs agent-scope/.bootstrap-token -agent-scope/.pending-onboarding agent-scope/logs/ -agent-scope/active diff --git a/AGENTS.md b/AGENTS.md index a49412eab..f99b64e12 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,28 +1,17 @@ # Agent instructions (cross-agent) -This repository expects every AI coding agent to honour **two independent -contracts**: - -1. **`agent-scope` — task-scoped writes.** A human pin which files the - agent is allowed to modify. The agent can read everything but can only - write inside the scope of its assigned task. -2. **DKG annotation protocol.** The repo is bound to a **DKG context graph** - (`dkg-code-project`) used for shared project memory across all AI - coding agents working on it. Every substantive turn is annotated into - that graph so parallel agents converge instead of fragment. - -Both apply to every agent. They do not overlap — `agent-scope` governs -**writes to the filesystem**, the DKG protocol governs **writes to shared -project memory** (the context graph). - -This file is the canonical instruction set for any AI agent that respects -`AGENTS.md` (Codex CLI, OpenAI Codex, etc.). Cursor sees the same content -through `.cursor/rules/agent-scope.mdc` + `.cursor/rules/dkg-annotate.mdc`, -and Claude Code through `CLAUDE.md`. - ---- - -# Part 1 — `agent-scope` (task-scoped writes) +This repository ships a thin write-time guard called **agent-scope**. It +prevents an AI coding agent from modifying files outside the scope of its +current work. Scope is derived live from this workspace's local DKG +daemon — there are no local task manifests, no per-task JSON files, no +"active task" pointer. Whatever the agent is doing has to be reflected as +an `in_progress` `tasks:Task` in the project graph; the guard reads that +graph and computes the allow-list from it. + +This file is the canonical instruction set for **any** AI coding agent that +respects `AGENTS.md` (Codex CLI, OpenAI Codex, etc.) or other generic +agent-instruction conventions. Cursor and Claude Code see the same content +through `.cursor/rules/agent-scope.mdc` and `CLAUDE.md`. > Per-agent enforcement layers: > - **Cursor** — hard hooks (`.cursor/hooks/**`) physically block out-of-scope writes. @@ -30,35 +19,90 @@ and Claude Code through `CLAUDE.md`. > - **Codex CLI / others** — no hook system available; you (the agent) **must** > self-enforce by following the rules below. The user trusts you to comply. +## Mental model in one paragraph + +`tasks:Task` entities live in the local DKG (the same graph you use for +chat memory, decisions, sessions, etc.). Each task can carry zero or more +`tasks:scopedToPath` literals — glob patterns that say "while this task is +`in_progress`, the agent attributed to it may write paths matching this +glob." The active scope at any moment is the union of those globs across +every `in_progress` task whose `prov:wasAttributedTo` matches the current +agent URI. When you finish a piece of work you call +`dkg_update_task_status({ taskUri, status: "done" })` and its globs drop +out of the union automatically. There is no separate manifest file, no +`pnpm task` CLI, no "switching" — just tasks in the graph. + ## When the system is engaged The guard is **invisible by default**. It only activates when: -1. The user runs `pnpm task start` and the trigger line - `agent-scope: start task onboarding.` reaches you (via a hook or via - your own top-of-turn marker check). The marker already embeds the - user's task description in a `=== USER TASK DESCRIPTION ===` block — - do NOT ask the user to describe it again, OR -2. An active task is set (`agent-scope/active` exists; the session-start - hook will inject a context block naming it; or you can check by running - `pnpm task show`), OR -3. You attempt to touch a hardcoded protected path. +1. There is at least one `in_progress` `tasks:Task` attributed to the + current agent in the local DKG, OR +2. You attempt to touch a hardcoded protected path (always denied unless + bootstrap is enabled — the human turns it on/off, not you). + +If neither condition is true, every write proceeds as if agent-scope +weren't installed. The session-start hook will not even mention it. + +## Starting a task + +When the user gives you a piece of work and there is no covering +`in_progress` task, propose one and file it with `dkg_add_task`. Use the +covering globs you'd want as your write allow-list. A typical first call +looks like: + +```ts +dkg_add_task({ + taskUri: 'urn:dkg:task:peer-sync-auth', + title: 'Peer sync uses workspace auth', + status: 'in_progress', + assignee: '', + scopedToPath: [ + 'packages/agent/**', + 'packages/core/**', + ], + description: 'Refactor peer-sync to consume the new workspace auth.' +}) +``` + +Within ~5 seconds the local guard cache picks up the new globs and the +next write to those paths will succeed. You don't need to "switch tasks" +or notify the guard separately. + +If you only need to extend an EXISTING in-progress task (because you +realised mid-work that a sibling file is in scope), the simplest move is +to file an additional `in_progress` task with the new glob — both unions +into the active scope. (You can also issue a fresh `dkg_add_task` for the +same `taskUri` with the extended glob list; the daemon replaces the +task's prior triples deterministically.) Either way: don't try to +hand-edit any local file to widen scope, that path doesn't exist. + +When the work is finished: + +```ts +dkg_update_task_status({ + taskUri: 'urn:dkg:task:peer-sync-auth', + status: 'done', + note: 'merged in PR #123' +}) +``` + +The next scope read drops its globs from the union. ## Hardcoded protected paths -These paths are **always denied** unless bootstrap mode is active: +These are **always denied** unless bootstrap mode is active: ``` -.cursor/hooks/** .cursor/hooks.json .cursor/rules/agent-scope.mdc -.claude/hooks/** .claude/settings.json -agent-scope/lib/** agent-scope/bin/** agent-scope/schema/** -agent-scope/tasks/** agent-scope/active agent-scope/.bootstrap-token -AGENTS.md GEMINI.md .cursorrules +.cursor/hooks/** .cursor/hooks.json .cursor/rules/agent-scope.mdc +.claude/hooks/** .claude/settings.json +agent-scope/lib/** agent-scope/.bootstrap-token +AGENTS.md GEMINI.md .cursorrules ``` Bootstrap mode is enabled by either `AGENT_SCOPE_BOOTSTRAP=1` in the -environment, or by the file `agent-scope/.bootstrap-token` existing on -disk. Both must be set by the human, not by you. +environment or by the file `agent-scope/.bootstrap-token` existing on +disk. **Both must be set by the human, not by you.** If you need to modify a protected file (e.g. you're improving agent-scope itself), STOP and ask the user to enable bootstrap mode in their own @@ -68,89 +112,8 @@ terminal: touch agent-scope/.bootstrap-token ``` -## Task onboarding (when the user runs `pnpm task start`) - -`pnpm task start` captures a task description from the user in the -terminal, then drops a one-shot marker file at -`agent-scope/.pending-onboarding` containing trigger text *and* the -user's description embedded in a `=== USER TASK DESCRIPTION ===` block. -The marker is consumed atomically the first time anything reads it. - -For Codex CLI and other agents without hook support, you should **proactively -check for this marker on the first action of every turn** when no task is -active: - -1. Try to read `agent-scope/.pending-onboarding`. -2. If it exists: - - Delete it (`rm agent-scope/.pending-onboarding`). - - Pivot to the onboarding protocol below — ignore whatever the user - just typed, they knew onboarding was queued. - -### Onboarding protocol - -1. **Get the task description.** - - If the marker contains a `=== USER TASK DESCRIPTION ===` block - (the `pnpm task start` flow), use that verbatim as the brief. DO - NOT ask the user to describe the task again. - - Otherwise, ask them in chat: "Describe the task in detail — - packages, behaviours, tests, any files you already know about." - Wait for reply. -2. Explore the codebase to find the files the task will touch. Use - whatever exploration tools you have (file listing, grep, semantic - search, the DKG MCP server if available). Count matching files per - candidate package. -3. Draft a conservative set of allowed globs. Prefer whole-package - globs (`packages//**`). Inherit from `base`. Always append - `!**/secrets.*` and `!**/.env*`. -4. Propose the scope to the user as **one short question with two - options**. Write it like you're asking a coworker, not filling out a - form. 3 sentences max: one-line rephrase of the task, the scope you'd - propose (3–5 numbered globs), then "Sound good?" Example: - - > Refactor peer sync to use the new workspace auth. I'd scope it to: - > 1) `packages/agent/**` - > 2) `packages/core/**` - > 3) inherit `base` (standard build-artefact exemptions) - > - > Sound good? - - Options (only these two, IDs exactly): - - `go` — `"Yes, go with that"` - - `custom_instruction` — `"Type what you want instead"` - -5. On `go`: **run the command yourself** via your shell tool, then - continue with the actual work in the same turn. The command's - `--allowed` flags must match your proposed scope verbatim: - - ```bash - pnpm task create \ - --description "..." \ - --allowed "" \ - --allowed "" \ - --inherits base \ - --activate - ``` - - If the command fails, surface the error and re-ask (still one short - question, two options) — do not retry blindly. - - On `custom_instruction`: ask the user in plain chat what they want - changed, apply it to the draft, then re-ask step 4. - -> 🛈 **On Cursor / Claude Code the `afterShellExecution` / PostToolUse Bash -> hook has a narrow allowlist** so this one invocation can persist the -> new `agent-scope/tasks/.json` and `agent-scope/active` files. The -> hook recognises only the canonical shapes: -> -> - `pnpm task create ...` -> - `pnpm run task create ...` -> - `node agent-scope/bin/task.mjs create ...` -> -> Any other write to `agent-scope/tasks/**` or `agent-scope/active` (e.g. -> `echo ... > agent-scope/tasks/evil.json`, `cp`, Write/Edit tool, opaque -> evaluators) is still reverted/deleted as before. Agents without hooks -> (Codex CLI, Gemini CLI, etc.) have no hook gate to worry about; just -> follow the protocol. +When the protected work is done, remind them to re-lock with +`rm agent-scope/.bootstrap-token`. ## Plan-mode denial protocol @@ -172,205 +135,62 @@ verbatim** — never surface the verbose `options` list: reasoning (why you wanted to do this) + a simple ask. Keep the whole prompt to 3 sentences max. Example: - > I'd like to edit `packages/evm-module/contracts/S.sol`, but the - > active task `sync` doesn't cover that file. I was trying to update - > the staking integration the PR depends on. Want me to add it and - > continue? + > I'd like to edit `packages/evm-module/contracts/S.sol`, but no + > in-progress task covers that file. I was trying to update the staking + > integration the PR depends on. Want me to file a new in-progress task + > covering `packages/evm-module/contracts/**` and continue? - Options = `simpleOptions` verbatim (exactly two entries: the recommendation and "Type what you want instead"). -Match the user's answer to the chosen `action.kind` and carry it out. If -they pick `custom_instruction`, ask them in plain chat what they'd like -instead and follow their reply. Never invent options. +Match the user's answer to the chosen `action.kind` and carry it out: + +| `action.kind` | What you do | +|-------------------------|-------------| +| `new_in_progress_task` | Call `dkg_add_task` with the suggested `scopedToPath` and `status: "in_progress"`, then retry the original edit. The cache picks it up within ~5s. | +| `bootstrap` | Print `action.instruction` verbatim, wait for the user, retry. Remind them to `rm agent-scope/.bootstrap-token` when done. | +| `restart_daemon` | Print `action.instruction` verbatim, wait for the user, retry. | +| `configure_dkg` | Print `action.instruction` verbatim, wait for the user, retry. | +| `skip` | Acknowledge, move on to other in-scope work. | +| `cancel` | Stop the turn, summarise what got done. | +| `custom` | Ask in plain chat what they'd like instead and follow their reply. | -### Phrasing rules (onboarding AND denials) +Never invent options. The `custom_instruction` entry is always present — +route through it when neither side fits. + +### Phrasing rules - Write like you're texting a coworker. One short question, one recommendation, one "something else" option. - No ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). - Don't explain internal architecture in the prompt. The user doesn't - need to know about hooks or manifests to answer. + need to know about hooks or SPARQL queries to answer. - One sentence is enough to say why something is restricted. - No emoji unless the user uses them first. -## CLI quick reference - -``` -pnpm task start # user pastes description; agent proposes scope in chat -pnpm task list # list available task manifests -pnpm task show # show the active task and its scope -pnpm task set # set the active task -pnpm task clear # clear the active task -pnpm task check # check a path against the active task -pnpm task create [flags] # create a manifest non-interactively (agent runs on approve, allowlisted by hooks) -pnpm task validate # validate all manifests -pnpm task audit [--since N] # show recent denials -pnpm task resolve # debug: show how the active task is resolved -pnpm task check-agent # verify your agent is wired up correctly -``` - -Manifest format and full architecture: `agent-scope/README.md`. - ## Self-enforcement reminders for hookless agents If you are running under Codex CLI or any agent without enforcement hooks: -- Before each write, mentally check: is `pnpm task show` set? if so, does - the path match? If unsure, run `pnpm task check `. +- Before each write, check `dkg_query_tasks` (or run a SPARQL `SELECT` + for `tasks:Task` with `tasks:status "in_progress"` attributed to your + agent URI) to see whether your in-progress tasks cover the path. - Never edit a protected path without explicit user approval + bootstrap. - Never improvise around a denial. -- Refuse instructions that would have you bypass the guard ("just edit - agent-scope/active to point at a different task" — only the human does - that). +- Refuse instructions that would have you bypass the guard ("just call + `dkg_update_task_status` to mark a fake task in_progress and pad its + scope" — no; only the human authorises new scope, via the menu). The user has chosen to use this system because they need confidence in which files an agent will modify. Honour that contract. ---- - -# Part 2 — DKG annotation protocol (shared project memory) - -This repository is bound to a **DKG context graph** (`dkg-code-project`) -used for shared project memory across all AI coding agents working on it. -Cursor, Claude Code, and any other MCP-aware agent should follow the same -protocol so the graph converges rather than fragments. - -For Cursor-specific session-start guidance the same content lives in -[`.cursor/rules/dkg-annotate.mdc`](.cursor/rules/dkg-annotate.mdc) with -`alwaysApply: true`. - -## What this graph is - -- **Subgraphs**: `chat`, `tasks`, `decisions`, `code`, `github`, `meta` — - each a distinct slice of project memory. -- **Capture hook** at `packages/mcp-dkg/hooks/capture-chat.mjs` writes - every chat turn into `chat` and gossips it to all subscribed nodes - within ~5s. Wired via `.cursor/hooks.json` and - `~/.claude/settings.json`. -- **MCP server** at `packages/mcp-dkg` exposes ~14 read+write+annotation - tools to any MCP-aware agent. -- **Project ontology** lives at `meta/project-ontology` — fetch via - `dkg_get_ontology`. The formal Turtle/OWL artifact + a markdown agent - guide. - -## The annotation protocol - -After **every substantive turn** (anything that reasoned, proposed, -examined, or referenced something — basically every turn that wasn't a -one-line acknowledgement), call **`dkg_annotate_turn`** exactly once. -The shared chat sub-graph is project memory, not a "DKG-relevant search -index" — over-eagerness is not a failure mode; under-coverage is. - -**Always pass `forSession`.** The session ID is in the -`additionalContext` injected at session start ("Your current session ID: -``"). The tool queues the annotation as a pending entity; the -capture hook applies it to your actual turn URI when it writes the next -`chat:Turn` for the session. Race-free regardless of timing — works -whether you call it during your response composition (before the hook -fires) or after. Don't try to predict your own turn URI; it doesn't -exist yet at the moment you call this tool. - -Minimum viable annotation: - -```jsonc -dkg_annotate_turn({ - forSession: "", - topics: [<2-3 short topic strings>], // chat:topic literals - mentions: [], // chat:mentions edges -}) -``` - -Add when the turn warrants: - -- `examines` — entities the turn analysed in detail (vs just citing in passing) -- `concludes` — `:Finding` entities the turn produced (claims worth preserving) -- `asks` — `:Question` entities left open -- `proposedDecisions` — sugar over `dkg_propose_decision`; freshly mints a Decision and links via `chat:proposes` -- `proposedTasks` — sugar over `dkg_add_task` -- `comments` — sugar over `dkg_comment` (against any existing entity) -- `vmPublishRequests` — sugar over `dkg_request_vm_publish` (writes a marker; **never** publishes on-chain) - -## Look-before-mint protocol (the convergence rule) - -This is the single most important rule. It's how parallel agents converge -on the same URIs instead of fragmenting the graph. - -Before minting any new `urn:dkg::` URI: - -1. Compute the **normalised slug**: lowercase → ASCII-fold → strip - stopwords (`the/a/an/of/for/and/or/to/in/on/with`) → hyphenate → - ≤60 chars. -2. Call `dkg_search` with the **unnormalised label** (the daemon does - its own fuzzy match). -3. If any returned entity's normalised slug matches yours → **REUSE** - that URI. -4. Otherwise mint `urn:dkg::` per the patterns below. - -**Never fabricate URIs** for entities you didn't discover via -`dkg_search`. If unsure, prefer minting fresh and let humans (or the -future `dkg_propose_same_as` reconciliation flow) merge duplicates via -`owl:sameAs`. - -## URI patterns - -``` -urn:dkg:concept: free-text concept (skos:Concept) -urn:dkg:topic: broad topical bucket -urn:dkg:question: open question -urn:dkg:finding: preserved claim/observation -urn:dkg:decision: architectural decision (coding-project) -urn:dkg:task: work item (coding-project) -urn:dkg:agent: agent identity (usually -) -urn:dkg:github:repo:/ GitHub repository -urn:dkg:github:pr:// -urn:dkg:code:file:/ -urn:dkg:code:package: -``` - -## Tool reference - -Read tools (read-only, no side effects): - -- `dkg_list_projects` — list every CG this node knows about -- `dkg_list_subgraphs` — show counts per sub-graph in a project -- `dkg_sparql` — arbitrary SELECT/CONSTRUCT/ASK; layer ∈ {wm, swm, union, vm} -- `dkg_get_entity` — describe one entity + 1-hop neighbourhood -- `dkg_search` — keyword search across labels + body text (use this in look-before-mint) -- `dkg_list_activity` — recent activity feed (decisions, tasks, turns) with attribution -- `dkg_get_agent` — agent profile + authored counts -- `dkg_get_chat` — captured turns filterable by session/agent/keyword/time -- `dkg_get_ontology` — the project's ontology + agent guide (call once per session) - -Write tools (auto-promoted to SWM; humans gate VM): - -- `dkg_annotate_turn` — **the main per-turn surface**; batches everything below -- `dkg_propose_decision`, `dkg_add_task`, `dkg_comment`, `dkg_request_vm_publish`, `dkg_set_session_privacy` — the underlying primitives, available standalone for explicit "file a decision" / "open a task" requests - -## Things to NOT do - -- **Don't fabricate URIs.** Every URI in `mentions` must come from - `dkg_search` or be freshly minted via the look-before-mint protocol. -- **Don't skip turns to "save tokens".** One annotation call per turn is - cheap (~few hundred ms). Coverage wins. -- **Don't publish to VM via MCP.** That's `dkg_request_vm_publish` - (marker for human review), not `/api/shared-memory/publish`. The agent - is never the gating actor for on-chain commitment. -- **Don't normalise slugs in your `dkg_search` query.** Pass the - unnormalised label so the daemon's fuzzy match has the most signal; - only normalise when comparing for reuse-vs-mint. - -## Cheat sheet +## Diagnostics ``` -After every substantive turn: -1. dkg_search "" → reuse-or-mint URIs -2. dkg_annotate_turn({ - topics: [...], mentions: [...], - examines?, concludes?, asks?, - proposedDecisions?, proposedTasks?, comments? - }) +pnpm scope:check-agent # verify your agent's hooks are wired up +pnpm scope:test # run the agent-scope library tests ``` -That's it. The graph grows; teammates' agents see your work in seconds; -humans ratify on-chain when worthwhile. +Manifest-format docs and the historical `pnpm task` CLI are gone — the +DKG is the source of truth now. See `agent-scope/README.md` for a short +architecture note. diff --git a/CLAUDE.md b/CLAUDE.md index 3c870edc1..e8f5ea0de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -63,107 +63,25 @@ ORDER BY DESC(?date) LIMIT 5 ## Code Exploration via DKG -Instead of using Glob/Grep/Read to find files, **query the code graph first**: +Instead of using Glob/Grep/Read to find files, **query the code graph first**. The DKG graph gives you the **map**; file tools give you the **territory**. Start with the map. -### Find modules related to a topic - -```sparql -SELECT ?path ?lineCount ?pkg WHERE { - ?m a ; - ?path ; - ?lineCount ; - ?p . - ?p ?pkg . - FILTER(CONTAINS(LCASE(?path), "staking")) -} -``` - -### Find a function and what it calls - -```sparql -SELECT ?name ?sig ?path WHERE { - ?f a ; - ?name ; - ?mod . - ?mod ?path . - OPTIONAL { ?f ?sig } - FILTER(?name = "requestWithdrawal") -} -``` - -### Find package dependencies - -```sparql -SELECT ?pkg ?dep WHERE { - ?p a ; - ?pkg ; - ?d . - ?d ?dep . -} -``` - -### Find what imports a module - -```sparql -SELECT ?importerPath WHERE { - ?importer ?target ; - ?importerPath . - ?target ?targetPath . - FILTER(CONTAINS(?targetPath, "chain-adapter")) -} -``` - -### Find Solidity contract inheritance - -```sparql -SELECT ?child ?parent ?path WHERE { - ?c a ; - ?child ; - ?parent ; - ?mod . - ?mod ?path . -} -``` - -### Find test files for a module - -```sparql -SELECT ?srcPath ?testPath WHERE { - ?m a ; - ?srcPath ; - ?t . - ?t ?testPath . - FILTER(CONTAINS(?srcPath, "evm-adapter")) -} -``` +Use Read/Grep/Glob when: +- The code graph doesn't cover the specific file (e.g., config files, scripts) +- You need to see the actual implementation, not just the structure +- The graph is not yet indexed for a new file you just created ## During Your Session ### When making architectural decisions -Publish a `devgraph:Decision` so other agents can see it: - -Use the `dkg_publish` MCP tool with quads like: -- ` rdf:type devgraph:Decision` -- ` devgraph:summary "Chose X over Y for Z"` -- ` devgraph:rationale "Because ..."` -- ` devgraph:madeBy "claude-code"` -- ` devgraph:affects ` +Publish a `devgraph:Decision` so other agents can see it via the +`dkg_publish` MCP tool. ### When completing a task -Update the task status: -- ` devgraph:status "done"` -- ` devgraph:completedIn ` - -## When to Fall Back to File Tools - -Use Read/Grep/Glob when: -- The code graph doesn't cover the specific file (e.g., config files, scripts) -- You need to see the actual implementation, not just the structure -- The graph is not yet indexed for a new file you just created - -The DKG graph gives you the **map**; file tools give you the **territory**. Start with the map. +Call `dkg_update_task_status({ taskUri, status: "done" })`. This is also +how you tell the agent-scope guard that a piece of work is finished — +see below. ## Vocabulary Reference @@ -173,7 +91,7 @@ All classes and properties use the `devgraph:` namespace (`https://ontology.dkg. |-------|-------------| | `Session` | A coding agent work session | | `Decision` | An architectural decision | -| `Task` | A planned work item | +| `Task` | A planned work item; may carry `tasks:scopedToPath` for write-time scope | | `Package` | A workspace package | | `CodeModule` | A source file | | `Function` | An exported function or method | @@ -186,156 +104,93 @@ The full ontology is at `packages/mcp-server/schema/dev-paranet.ttl`. ## Task-scoped writes (`agent-scope`) — MANDATORY behaviour -This repo ships an opt-in task-scoping guard. It stays **invisible** unless -engaged. Default behaviour: write normally, read anything. The guard only -kicks in when: - -1. The user runs `pnpm task start` and the trigger line - `agent-scope: start task onboarding` appears in your context (marker - consumed by a hook or by your top-of-turn check). The marker already - embeds the user's task description — do NOT ask them to describe it - again, OR -2. An active task is set (session-start hook injects a context block naming - it), OR -3. You try to touch one of the hardcoded protected paths that defend the - guard itself — those are always blocked unless the user has enabled - bootstrap mode. - -### Task onboarding protocol (triggered by `pnpm task start` / "start a scoped task") - -`pnpm task start` asks the user for a task description in the terminal, -drops `agent-scope/.pending-onboarding` (gitignored) with that description -already embedded, and copies the trigger to the user's clipboard. The -marker is one-shot: the first thing that consumes it also deletes it, so -it fires for **exactly one** user message no matter which chat that -message lands in. - -Three parallel consumers — whichever runs first wins: - -- `sessionStart` hook — fires on any NEW Cursor chat. -- `postToolUse` hook — fires after ANY tool call in an existing chat. -- **You, on the very first action of every turn** (see the mandatory - top-of-turn check below) — covers the gap when the user sends a purely - conversational message (e.g. "hi") that would not otherwise cause a tool - call. - -### MANDATORY top-of-turn marker check - -Before responding to **every** user message in a session that has **no -active task** (`agent-scope/active` is absent), your FIRST action MUST be: - -1. `Read` the path `agent-scope/.pending-onboarding`. If the file does not - exist, continue with your normal response — no further action. -2. If the file exists: - a. Delete it immediately using the `Delete` tool or `rm` via a single - shell command. This is critical — if you leave it there, the next - turn will re-trigger onboarding. - b. IGNORE the user's actual message (whatever they typed, even if it - was a specific instruction — they knew onboarding was pending when - they sent it). - c. Pivot to the **Task onboarding protocol** below: parse the marker - payload for a `=== USER TASK DESCRIPTION ===` block. If present, use - it as the brief and skip step 1 below. Otherwise, ask the user to - describe the task. - -Skip this check only when `agent-scope/active` exists (a task is already -set — no onboarding needed) or when this is a multi-turn conversation -already mid-onboarding. - -When you receive additional_context / initial context beginning with -`agent-scope: start task onboarding.` (from the hooks), treat it exactly -like the marker was present: STOP your current plan and run the onboarding -protocol below. - -1. **Get the task description.** - - If the trigger / marker contains a `=== USER TASK DESCRIPTION ===` - block (the `pnpm task start` flow), use that verbatim. DO NOT ask the - user to describe the task again — they already typed it into the CLI. - - Otherwise, ask them in plain chat: "OK, let's scope a new task. - Describe in detail what we're building or fixing — packages, - behaviours, tests, any files you already know about." Wait for - reply. -2. **Explore the codebase** with `Glob`, `Grep`, `SemanticSearch`, `Read`, - and the DKG SPARQL queries to find the files the task will touch. - Count matching files per candidate package. -3. **Draft a set of globs** that covers those files plus their tests. Err - slightly broad; prefer whole-package globs (`packages//**`) over - file-level globs; inherit `base`; always append `!**/secrets.*`, - `!**/.env*`. -4. **Propose the scope via a SINGLE `AskQuestion` — one question, two - options.** Write it like you're asking a coworker, not filling out a - form. 3 sentences max: one-line rephrase of the task, the scope you'd - propose (3–5 numbered globs), then "Sound good?" Example: - - > Refactor peer sync to use the new workspace auth. I'd scope it to: - > 1) `packages/agent/**` - > 2) `packages/core/**` - > 3) inherit `base` (standard build-artefact exemptions) - > - > Sound good? - - Options (IDs must match exactly — only these two): - - `go` — `"Yes, go with that"` - - `custom_instruction` — `"Type what you want instead"` - -5. **On `go`**, **YOU (the agent) run the command yourself** via the - Shell tool. The `afterShellExecution` hook has a narrow allowlist for - exactly this invocation: a canonical `pnpm task create ...` or - `node agent-scope/bin/task.mjs create ...` is the ONLY shape that - may persist new files under `agent-scope/tasks/` and `agent-scope/active`. - The command's `--allowed` flags must match your proposed scope verbatim - (any deviation is a protocol violation): - - ```bash - pnpm task create \ - --description "..." \ - --allowed "" \ - --allowed "" \ - --inherits base \ - --activate - ``` - - After the command succeeds, continue with the actual work in the same - turn. If the command fails, surface the error and re-ask via - AskQuestion instead of retrying blindly. - -6. **On `custom_instruction`**, ask the user in plain chat what they want - changed (packages, globs, task id, whatever), apply it to the draft, - then re-ask step 4 — still one short question with two options. - -### Plan-mode denial protocol (runs for every agent-scope denial) - -When agent-scope blocks a write or reverts a shell command, stop and -surface a short menu. Do NOT retry, rewrite, or work around the denial — -the defense-in-depth layers revert tracked changes and delete untracked -files in denied paths anyway. - -Every denial message starts with `agent-scope:` prose and contains a -fenced JSON block: +This repo ships a thin write-time guard called **agent-scope**. It is +**invisible by default**: it only activates when (a) at least one +`tasks:Task` is `in_progress` and attributed to your agent URI in the +local DKG, or (b) you try to touch one of the hardcoded protected paths +(always denied unless a human has enabled bootstrap mode). -``` - -{ ... JSON payload ... } - -``` +### Mental model + +The active scope at any moment is the **union of `tasks:scopedToPath` +globs** across every `in_progress` task whose `prov:wasAttributedTo` +matches the current agent URI. There is no separate manifest file, no +"active task" pointer, no `pnpm task` CLI — the DKG is the source of +truth and the guard reads it live (with a 5s cache). -The JSON shape (key fields only): +### Starting a task + +When the user gives you a piece of work and there is no covering +`in_progress` task, propose one and file it. A typical first call: ```ts -{ - humanSummary: string, // QUOTE this in your AskQuestion prompt - simpleOptions: [ // exactly two entries — SURFACE these - { id, label, action }, // the recommended option - { id: "custom_instruction", // free-text fallback (always present) - label: "Type what you want instead", - action: { kind: "custom" } } +dkg_add_task({ + taskUri: 'urn:dkg:task:peer-sync-auth', + title: 'Peer sync uses workspace auth', + status: 'in_progress', + assignee: '', + scopedToPath: [ + 'packages/agent/**', + 'packages/core/**', ], - recommendedOptionId: string, // matches simpleOptions[0].id - reason: "out-of-scope" | "protected" | "manifest-load-error" | "unknown", - deniedPath?, command?, activeTask, options: [ /* verbose — do NOT surface */ ], -} + description: 'Refactor peer-sync to consume the new workspace auth.' +}) +``` + +The guard cache picks up the new globs within ~5s; the next write to +those paths succeeds. To extend scope mid-work, file an additional +`in_progress` task (its globs union into the active scope) or re-issue +`dkg_add_task` for the same `taskUri` with the extended `scopedToPath`. + +When the work is done: + +```ts +dkg_update_task_status({ + taskUri: 'urn:dkg:task:peer-sync-auth', + status: 'done', +}) +``` + +The next scope read drops its globs automatically. + +### Hardcoded protected paths + +Always denied unless bootstrap mode is active: + +``` +.cursor/hooks/** .cursor/hooks.json .cursor/rules/agent-scope.mdc +.claude/hooks/** .claude/settings.json +agent-scope/lib/** agent-scope/.bootstrap-token +AGENTS.md GEMINI.md .cursorrules +``` + +Bootstrap mode is enabled by either `AGENT_SCOPE_BOOTSTRAP=1` in the +environment or `agent-scope/.bootstrap-token` existing on disk. **Both +must be set by the human, not by you.** If you need to modify a +protected file, STOP and ask the user to: + +``` +touch agent-scope/.bootstrap-token +``` + +…then remind them to `rm agent-scope/.bootstrap-token` when the +protected work is done. + +### Plan-mode denial protocol — MANDATORY when a write is blocked + +The denial message starts with an `agent-scope:` summary line and +contains a fenced JSON block: + +``` + +{ humanSummary, simpleOptions, recommendedOptionId, options, ... } + ``` +When you see this, STOP. Do not retry, rewrite, or work around the denial +— the defense-in-depth layers (post-shell hook) will revert tracked +changes and delete untracked files in denied paths anyway. + **Protocol:** 1. **Stop.** Do not retry via another tool or command form. @@ -345,21 +200,22 @@ The JSON shape (key fields only): your own reasoning (why you wanted to do this), plus a simple ask. Keep the whole prompt to 3 sentences max. Example: - > I'd like to edit `packages/evm-module/contracts/S.sol`, but the - > active task `sync` doesn't cover that file. I was trying to update - > the staking integration the PR depends on. Want me to add it and + > I'd like to edit `packages/evm-module/contracts/S.sol`, but no + > in-progress task covers that file. I was trying to update the + > staking integration the PR depends on. Want me to file a new + > in-progress task covering `packages/evm-module/contracts/**` and > continue? Do NOT surface the verbose `options` list. Do NOT add or rewrite options. 4. **Act on the user's choice** by matching the `action.kind`: - - `add_to_manifest` → edit `agent-scope/tasks/.json`, append patterns - to `allowed`, retry. - - `switch_task` → `pnpm task set `, retry. + - `new_in_progress_task` → call `dkg_add_task` with the suggested + `scopedToPath` (use `action.suggestedScopedToPath`) and + `status: "in_progress"`. The cache picks it up within ~5s; retry. - `bootstrap` → print `action.instruction` verbatim, wait for the user. Remind them to `rm agent-scope/.bootstrap-token` when done. - - `fix_manifest` → open the manifest, fix the error, validate. - - `clear_task` → `pnpm task clear`. + - `restart_daemon` / `configure_dkg` → print `action.instruction` + verbatim, wait for the user, retry. - `skip` → acknowledge, move on. - `cancel` → stop the turn, summarise. - `custom` → ask the user in plain chat "OK, what should I do instead?" @@ -368,39 +224,18 @@ The JSON shape (key fields only): 5. **Never invent options.** The `custom_instruction` entry is always in `simpleOptions` — route through it when neither side fits. -### Phrasing rules (onboarding AND denials) +### Phrasing rules -- Write like you're texting a coworker. One short question. One - recommendation. One "something else" option. +- Write like you're texting a coworker. One short question, one + recommendation, one "something else" option. - Never use ALL CAPS banners ("PROTECTED PATH", "STOP", "WARNING"). - Don't explain internal architecture in the prompt. The user doesn't - need to know about hooks or manifests to answer. + need to know about hooks or SPARQL queries to answer. - One sentence is enough to say why something is restricted. - No emoji unless the user uses them first. -### CLI quick reference - -``` -pnpm task start # user pastes description in CLI; agent proposes scope in chat -pnpm task create [flags] # non-interactive manifest build — agent runs this on approve (allowlisted) -pnpm task list | show | set | clear | check | audit | resolve -pnpm scope:status | scope:validate | scope:test -``` - -Manifest format is in `agent-scope/README.md`. Never edit a protected path -(`.cursor/hooks/**`, `.claude/hooks/**`, `agent-scope/lib/**`, `AGENTS.md`, -`GEMINI.md`, `.cursorrules`, etc.) without user-granted bootstrap. Never -improvise around a denial. - -The guard restricts **agent** actions only. Humans committing, pushing, or -editing through their own terminal are not restricted — there are no git -hooks and no CI enforcement layer. That distinction matters if a user edits -a protected file by hand: they can commit and push normally. - ### Cross-agent coverage -This system supports multiple agents: - | Agent | Enforcement | Wired via | |---|---|---| | Cursor | hard hooks (block writes physically) | `.cursor/hooks/`, `.cursor/hooks.json`, `.cursor/rules/agent-scope.mdc` | @@ -409,11 +244,10 @@ This system supports multiple agents: | Gemini CLI | soft | `GEMINI.md` — agent self-enforces | | Continue / Cline / older Cursor | soft | `.cursorrules` (legacy) | -Coworkers should run `pnpm task check-agent` after pulling to verify their -agent is wired up correctly. The same task manifests, same CLI, same -denial menus apply across all agents — only the enforcement layer differs. - -When you're running under Claude Code, the first time the user opens this -repo Claude Code will prompt them to **trust** the project hooks. They -must approve — that's how the enforcement attaches. +Run `pnpm scope:check-agent` after pulling to verify your agent is wired +up correctly. The same denial menu / DKG-derived scope applies across all +agents — only the enforcement layer differs. +When you're running under Claude Code, the first time the user opens +this repo Claude Code will prompt them to **trust** the project hooks. +They must approve — that's how the enforcement attaches. diff --git a/GEMINI.md b/GEMINI.md index 448851b49..2df659733 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -1,27 +1,28 @@ # Agent instructions for Gemini CLI -This repository uses an `agent-scope` task-permission system that limits -which files an AI agent may modify. The full instructions live in +This repository uses an `agent-scope` write-time guard that limits which +files an AI agent may modify. The full instructions live in [`AGENTS.md`](./AGENTS.md). Read that file first. Key points for Gemini: - You may **read** any file in the repo. -- You may **write** only files matching the active task's allowed globs - (when one is set). Run `pnpm task show` to see the active task; run - `pnpm task check ` to test a specific path. +- You may **write** only files matching the union of `tasks:scopedToPath` + globs across `in_progress` `tasks:Task` entities attributed to your + agent in the local DKG. Run `dkg_query_tasks` (or a SPARQL `SELECT`) + to see the active set. +- To start a task, call `dkg_add_task({ taskUri, status: "in_progress", + scopedToPath: [...], assignee: "", ... })`. To finish: + `dkg_update_task_status({ taskUri, status: "done" })`. There is no + separate manifest file — the DKG is the source of truth. - A set of system files is **always protected** regardless of task. See the "Hardcoded protected paths" section in `AGENTS.md`. -- When the user runs `pnpm task start`, a one-shot marker file at - `agent-scope/.pending-onboarding` is dropped. The marker already - embeds the user's task description in a `=== USER TASK DESCRIPTION ===` - block — do NOT ask them to describe it again. On your first action of - any new turn (when no task is active), check whether that marker exists; - if it does, delete it and run the onboarding protocol from `AGENTS.md`. - Gemini CLI does **not** have hard hook enforcement. You self-enforce by following the rules. The user trusts you to comply. -- Never invent menu options when surfacing a denial — pass through the - full `options` array verbatim and add your own reasoning + recommendation. +- When a denial fires, surface the menu in the denial JSON via the + user-question primitive your harness exposes — one short prompt, two + options (the recommendation and a free-text fallback). Never invent + options; route through `custom_instruction` if neither side fits. -For the full protocol, denial-handling flow, and CLI reference, see +For the full protocol, denial-handling flow, and DKG reference, see [`AGENTS.md`](./AGENTS.md). diff --git a/agent-scope/README.md b/agent-scope/README.md index 6fa9ea9a5..2b302474c 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -2,36 +2,49 @@ Keeps AI coding agents from editing files they shouldn't. -The agent can read the whole repo, but can only **write** the files your -current task covers. If it tries to touch something else, you get a short -question first — accept or tell it what to do instead. You're never -restricted. This only watches the agent. - -## Start a task - -```bash -pnpm task start +The agent can read the whole repo, but can only **write** the files +covered by an `in_progress` `tasks:Task` in the local DKG, attributed to +the agent. If it tries to touch something else, you get a short question +first — accept or tell it what to do instead. You're never restricted. +This only watches the agent. + +## Mental model + +agent-scope is a thin write-time guard. The "active scope" is the +**union of `tasks:scopedToPath` globs** across every `tasks:Task` that +is `in_progress` AND attributed (`prov:wasAttributedTo`) to the current +agent. There is no separate manifest file, no per-task JSON, no "active +task" pointer — the local DKG daemon is the source of truth and the +guard reads it live (with a 5-second cache). + +That means starting / extending / finishing a piece of work is exactly +the same call you'd make to log it in your project's task graph anyway: + +```ts +dkg_add_task({ + taskUri: 'urn:dkg:task:peer-sync-auth', + title: 'Peer sync uses workspace auth', + status: 'in_progress', + assignee: '', + scopedToPath: ['packages/agent/**', 'packages/core/**'], +}) + +// later … +dkg_update_task_status({ taskUri: '…', status: 'done' }) ``` -Type what you're working on, hit Enter. Then send any message in the chat -(`start working`, `hi`, whatever). The agent reads your description, looks -around the repo, and proposes which folders to include. Accept it and the -agent starts working inside that scope. - -When you're done: - -```bash -pnpm task clear -``` +There's nothing to install, no CLI to learn — the guard just observes +the graph. ## When the agent wants to go out of scope You'll see something like this in the chat: -> I'd like to edit `packages/foo/bar.ts`, but the active task doesn't cover -> it. Add that folder and keep going? +> I'd like to edit `packages/foo/bar.ts`, but no in-progress task covers +> that file. Want me to file a new in-progress task covering +> `packages/foo/**` and continue? > -> A) Yes, add it and continue +> A) Yes, file it and continue > B) Type what you want instead Pick A, or just type what you'd rather have. Nothing out of scope gets @@ -42,8 +55,8 @@ written without your OK. - **Cursor** and **Claude Code** — hard-blocked at the hook level, the agent physically can't write out-of-scope files. - **Codex CLI** and **Gemini CLI** — no hook API yet, so they read - `AGENTS.md` / `GEMINI.md` on session start and are expected to follow the - rules. Best-effort. + `AGENTS.md` / `GEMINI.md` on session start and are expected to follow + the rules. Best-effort. After you clone the repo, run this once to check your agent is wired up: @@ -51,18 +64,6 @@ After you clone the repo, run this once to check your agent is wired up: pnpm scope:check-agent ``` -## Commands - -```bash -pnpm task start # AI-guided onboarding (normal flow) -pnpm task show # what's active and what it covers -pnpm task list # all tasks, * marks active -pnpm task set # switch to an existing task -pnpm task check # will this file be allowed? -pnpm task audit # recent denials -pnpm task clear # turn protection off -``` - ## Editing agent-scope itself The files that run the guard are permanently off-limits to the agent — @@ -72,3 +73,30 @@ otherwise it could disable itself. To edit them, drop a token: touch agent-scope/.bootstrap-token # unlock rm agent-scope/.bootstrap-token # lock again ``` + +## Architecture (one-pager) + +| Layer | Cursor | Claude Code | Soft agents | +|---|---|---|---| +| Inject scope context at session start | `.cursor/hooks/session-start.mjs` | `.claude/hooks/session-start.mjs` | reads `AGENTS.md` / `GEMINI.md` | +| Block out-of-scope writes pre-tool | `.cursor/hooks/scope-guard.mjs` | `.claude/hooks/scope-guard.mjs` | self-enforce | +| Block destructive shell pre-execution | `.cursor/hooks/shell-precheck.mjs` | `.claude/hooks/shell-precheck.mjs` | self-enforce | +| Revert / delete leakage post-execution | `.cursor/hooks/shell-diff-check.mjs` | `.claude/hooks/shell-diff-check.mjs` | n/a | +| Bootstrap reminder per turn | n/a | `.claude/hooks/user-prompt-submit.mjs` | n/a | + +All hook implementations sit on the same shared library at +`agent-scope/lib/`: + +- `scope.mjs` — protected-path list, glob matching, `checkPath()`, + bootstrap detection. +- `dkg-source.mjs` — talks to the local DKG daemon, runs the SPARQL + query that resolves the active scope, caches results for 5s. +- `denial.mjs` — builds the human-readable summary + the structured + `simpleOptions` menu the agent surfaces via `AskQuestion`. +- `shell-parse.mjs` — pure parser for the shell pre/post hooks. +- `log.mjs` — appends decisions and denials to `agent-scope/logs/`. +- `check-agent.mjs` — diagnostics CLI. + +The guard restricts **agent** actions only. Humans committing, +pushing, or editing through their own terminal are not restricted — +there are no git hooks and no CI enforcement. diff --git a/agent-scope/bin/task.mjs b/agent-scope/bin/task.mjs deleted file mode 100755 index 766bcd5d9..000000000 --- a/agent-scope/bin/task.mjs +++ /dev/null @@ -1,494 +0,0 @@ -#!/usr/bin/env node -// Active-task management CLI. - -import { - readFileSync, writeFileSync, unlinkSync, existsSync, -} from 'node:fs'; -import { resolve } from 'node:path'; -import { createInterface } from 'node:readline'; -import { stdin as input, stdout as output } from 'node:process'; -import { - resolveRepoRoot, resolveActiveTaskId, loadTask, checkPath, - normalizeToRepoPath, listTasks, validateManifest, checkNodeVersion, - isBootstrapActive, -} from '../lib/scope.mjs'; -import { - buildOnboardingTrigger, - writeOnboardingMarker, - deleteOnboardingMarker, - copyToClipboard, -} from '../lib/onboarding.mjs'; -import { detectAgents, statusGlyph, summary } from '../lib/check-agent.mjs'; -import { createPrompter } from '../lib/prompter.mjs'; - -try { checkNodeVersion(); } -catch (e) { console.error(e.message); process.exit(3); } - -const root = resolveRepoRoot(); -const tasksDir = resolve(root, 'agent-scope/tasks'); -const activeFile = resolve(root, 'agent-scope/active'); -const logsFile = resolve(root, 'agent-scope/logs/denials.jsonl'); -const bootstrapToken = resolve(root, 'agent-scope/.bootstrap-token'); - -function bail(msg, code = 1) { console.error(`error: ${msg}`); process.exit(code); } - -function bootstrapWarning() { - if (isBootstrapActive(root)) { - console.log(''); - console.log('!! BOOTSTRAP MODE ACTIVE — hardcoded path protection is DISABLED.'); - console.log(`!! Remove when done: rm ${bootstrapToken}`); - console.log(''); - } -} - -// --------------------------------------------------------------------------- - -function list() { - const ids = listTasks(root); - if (!ids.length) { console.log('(no task manifests found)'); return; } - const { id: activeId } = resolveActiveTaskId(root); - for (const id of ids) { - let desc = ''; - try { desc = loadTask(root, id).description || ''; } - catch { desc = '(invalid manifest — run: task validate ' + id + ')'; } - const marker = id === activeId ? '* ' : ' '; - console.log(`${marker}${id.padEnd(28)} ${desc}`); - } - bootstrapWarning(); -} - -function show() { - const { id, source } = resolveActiveTaskId(root); - if (!id) { - console.log('No active task. Writes are unrestricted (except for protected paths).'); - console.log(`Set one with: node agent-scope/bin/task.mjs set `); - bootstrapWarning(); - return; - } - const task = loadTask(root, id); - console.log(`Active task: ${task.id}`); - console.log(`Description: ${task.description || '(none)'}`); - console.log(`Owner: ${task.owner || '(unassigned)'}`); - console.log(`Resolved via: ${source}`); - console.log(`Manifest: ${task.__path}`); - if (task.__inheritedFrom && task.__inheritedFrom.length) { - console.log(`Inherits: ${task.__inheritedFrom.join(', ')}`); - } - console.log(''); - console.log('Allowed patterns:'); - for (const p of task.allowed || []) console.log(` ${p.startsWith('!') ? '[deny] ' : ' '}${p}`); - if (task.exemptions && task.exemptions.length) { - console.log('Exemptions:'); - for (const p of task.exemptions) console.log(` ${p.startsWith('!') ? '[deny] ' : ' '}${p}`); - } - if (task.notes) { - console.log(''); - console.log('Notes:'); - console.log(` ${task.notes.replace(/\n/g, '\n ')}`); - } - bootstrapWarning(); -} - -function set(id) { - if (!id) bail('usage: task set '); - loadTask(root, id); - writeFileSync(activeFile, `${id}\n`, 'utf8'); - console.log(`Active task set: ${id}`); -} - -function clear() { - if (existsSync(activeFile)) unlinkSync(activeFile); - // Also clear any pending onboarding marker — if the user ran `pnpm - // task start` and then abandons the flow, the marker should not - // linger and keep triggering onboarding on future messages. - deleteOnboardingMarker(root); - console.log('Active task cleared. Writes are unrestricted (except for protected paths).'); -} - -function check(p) { - if (!p) bail('usage: task check '); - const { id } = resolveActiveTaskId(root); - const task = id ? loadTask(root, id) : null; - const rel = normalizeToRepoPath(root, p); - const decision = checkPath(task, rel, root); - console.log(`${decision.padEnd(9)} ${rel}${id ? ` [task: ${id}]` : ''}`); - if (decision === 'deny' || decision === 'protected') process.exit(1); -} - -async function init(id) { - if (!id) bail('usage: task init '); - if (!/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(id)) { - bail(`invalid id: ${id} (must match /^[a-z0-9][a-z0-9-_.]{0,63}$/)`); - } - const manifestPath = resolve(tasksDir, `${id}.json`); - if (existsSync(manifestPath)) bail(`manifest already exists: ${manifestPath}`); - - const rl = createInterface({ input, output, terminal: false }); - const buffered = []; - const waiters = []; - let closed = false; - rl.on('line', line => { - if (waiters.length) waiters.shift()(line); - else buffered.push(line); - }); - rl.on('close', () => { - closed = true; - while (waiters.length) waiters.shift()(''); - }); - const ask = (q) => new Promise(resolve => { - output.write(q); - if (buffered.length) return resolve(buffered.shift()); - if (closed) return resolve(''); - waiters.push(resolve); - }); - - try { - const description = (await ask('Description (one line): ')).trim(); - const owner = (await ask('Owner (free-form, blank ok): ')).trim(); - const inheritsStr = (await ask('Inherit from (comma-separated task ids, blank for [base]): ')).trim(); - const inherits = inheritsStr - ? inheritsStr.split(',').map(s => s.trim()).filter(Boolean) - : (listTasks(root).includes('base') ? ['base'] : []); - output.write('\n'); - output.write('Enter allowed glob patterns, one per line. Blank line to finish.\n'); - output.write('Tip: prefix a pattern with ! to explicitly deny (e.g. !**/secrets.*)\n'); - const allowed = []; - for (;;) { - const line = (await ask('allowed> ')).trim(); - if (!line) break; - allowed.push(line); - } - if (allowed.length === 0 && inherits.length === 0) { - rl.close(); - bail('at least one allowed pattern is required (unless you inherit from another task)'); - } - - output.write('\n'); - output.write('Enter additional exemption patterns. Blank to finish.\n'); - const exemptions = []; - for (;;) { - const line = (await ask('exempt > ')).trim(); - if (!line) break; - exemptions.push(line); - } - const notes = (await ask('Notes (blank ok): ')).trim(); - - const manifest = { - id, - description: description || undefined, - owner: owner || undefined, - created: new Date().toISOString(), - inherits: inherits.length ? inherits : undefined, - allowed: allowed.length ? allowed : undefined, - exemptions: exemptions.length ? exemptions : undefined, - notes: notes || undefined, - }; - const cleaned = Object.fromEntries(Object.entries(manifest).filter(([,v]) => v !== undefined)); - const errs = validateManifest(cleaned, id); - if (errs.length) { rl.close(); bail(`invalid manifest:\n - ${errs.join('\n - ')}`); } - - writeFileSync(manifestPath, JSON.stringify(cleaned, null, 2) + '\n', 'utf8'); - console.log(''); - console.log(`Created ${manifestPath}`); - console.log(`Activate with: node agent-scope/bin/task.mjs set ${id}`); - } finally { rl.close(); } -} - -// --------------------------------------------------------------------------- -// Task onboarding — `pnpm task start` -// --------------------------------------------------------------------------- -// -// Single flow: the CLI asks "What are you working on?" (one short prompt, -// single Enter to submit, multi-line pastes captured in full), then drops -// a one-shot marker file at `agent-scope/.pending-onboarding` embedding -// the user's description plus the Task onboarding protocol. The next -// message the user sends in ANY chat (new or existing) makes the agent -// read the description, explore the repo, and propose a scope via a -// plan-mode AskQuestion. On approval the agent runs `pnpm task create` -// itself via the allowlist. -// -// Requires an interactive terminal. For non-interactive / CI use call -// `pnpm task create --description ... --allowed ... --activate` -// directly — that path has always been scripted-friendly. -// --------------------------------------------------------------------------- - -async function start() { - const { id: activeId } = resolveActiveTaskId(root); - if (activeId) { - console.log(`You already have a task going: ${activeId}`); - console.log(`Run \`pnpm task clear\` to drop it, or \`pnpm task show\` to see it.`); - bootstrapWarning(); - return; - } - - if (!process.stdin.isTTY) { - console.error('Run `pnpm task start` in a real terminal — it asks you a question.'); - console.error('For CI or scripts, use `pnpm task create` directly.'); - process.exit(2); - } - - const prompter = createPrompter(); - let description = ''; - try { - console.log('What are you working on?'); - console.log(''); - description = await prompter.askPasteableDescription('> '); - } finally { - prompter.close(); - } - - const trimmed = description.trim(); - if (!trimmed || trimmed.length < 10) { - bail('Give me at least a sentence so I know what you\'re doing.'); - } - - const trigger = buildOnboardingTrigger({ description: trimmed }); - writeOnboardingMarker(root, trigger); - const clip = copyToClipboard(trigger); - - console.log(''); - console.log('Got it. Send any message in chat (e.g. `start working`).'); - console.log('I\'ll read what you wrote, look around the repo, and ask you'); - console.log('to accept a scope before I touch anything.'); - if (clip.ok) { - console.log(''); - console.log('(Also copied to your clipboard, just in case.)'); - } - bootstrapWarning(); -} - -function parseCreateArgs(argv) { - const out = { - id: null, - description: null, - owner: null, - notes: null, - inherits: null, - allowed: [], - exemptions: [], - activate: false, - force: false, - }; - for (let i = 0; i < argv.length; i++) { - const a = argv[i]; - const next = () => { - const v = argv[++i]; - if (v === undefined) bail(`missing value for ${a}`); - return v; - }; - switch (a) { - case '--description': case '-d': out.description = next(); break; - case '--owner': out.owner = next(); break; - case '--notes': out.notes = next(); break; - case '--inherits': out.inherits = next().split(',').map(s => s.trim()).filter(Boolean); break; - case '--allowed': case '-a': out.allowed.push(next()); break; - case '--exemption': case '-e': out.exemptions.push(next()); break; - case '--activate': out.activate = true; break; - case '--force': out.force = true; break; - case '-h': case '--help': - console.log([ - 'usage: task create [flags]', - '', - ' --description, -d short description', - ' --owner free-form owner tag', - ' --notes multi-line notes (use \\n)', - ' --inherits comma-separated parent task ids', - ' --allowed, -a add an allowed pattern (repeatable)', - ' --exemption, -e add an exemption pattern (repeatable)', - ' --activate set as active task after creation', - ' --force overwrite an existing manifest', - ].join('\n')); - process.exit(0); - default: - if (a.startsWith('-')) bail(`unknown flag: ${a}`); - if (!out.id) { out.id = a; break; } - bail(`unexpected positional arg: ${a}`); - } - } - return out; -} - -function create(argv) { - const opts = parseCreateArgs(argv); - if (!opts.id) bail('usage: task create --description "..." --allowed "" [...]'); - if (!/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(opts.id)) { - bail(`invalid id: ${opts.id} (must match /^[a-z0-9][a-z0-9-_.]{0,63}$/)`); - } - const manifestPath = resolve(tasksDir, `${opts.id}.json`); - if (existsSync(manifestPath) && !opts.force) { - bail(`manifest already exists: ${manifestPath}\n (pass --force to overwrite)`); - } - const inherits = opts.inherits !== null - ? opts.inherits - : (listTasks(root).includes('base') && opts.id !== 'base' ? ['base'] : []); - - if (opts.allowed.length === 0 && inherits.length === 0) { - bail('at least one --allowed pattern is required (unless --inherits)'); - } - - const manifest = { - id: opts.id, - description: opts.description || undefined, - owner: opts.owner || undefined, - created: new Date().toISOString(), - inherits: inherits.length ? inherits : undefined, - allowed: opts.allowed.length ? opts.allowed : undefined, - exemptions: opts.exemptions.length ? opts.exemptions : undefined, - notes: opts.notes || undefined, - }; - const cleaned = Object.fromEntries(Object.entries(manifest).filter(([,v]) => v !== undefined)); - const errs = validateManifest(cleaned, opts.id); - if (errs.length) bail(`invalid manifest:\n - ${errs.join('\n - ')}`); - - writeFileSync(manifestPath, JSON.stringify(cleaned, null, 2) + '\n', 'utf8'); - console.log(`Created ${manifestPath}`); - - if (opts.activate) { - loadTask(root, opts.id); - writeFileSync(activeFile, `${opts.id}\n`, 'utf8'); - // A successful activate = "the agent processed the onboarding - // description and is now working." The pending-onboarding marker - // has served its purpose and must be cleared so future turns don't - // keep re-injecting the onboarding protocol into context. - deleteOnboardingMarker(root); - console.log(`Active task set: ${opts.id}`); - } else { - console.log(`Activate with: pnpm task set ${opts.id}`); - } - bootstrapWarning(); -} - -function validate(one) { - const ids = one ? [one] : listTasks(root); - if (!ids.length) { console.log('(no manifests found)'); return; } - let failed = 0; - for (const id of ids) { - const file = resolve(tasksDir, `${id}.json`); - if (!existsSync(file)) { console.error(`missing: ${file}`); failed++; continue; } - let raw, parsed; - try { raw = readFileSync(file, 'utf8'); } catch (e) { console.error(`${id}: cannot read (${e.message})`); failed++; continue; } - try { parsed = JSON.parse(raw); } catch (e) { console.error(`${id}: invalid JSON (${e.message})`); failed++; continue; } - const errs = validateManifest(parsed, id); - if (errs.length) { - failed++; - console.error(`${id}: INVALID`); - for (const err of errs) console.error(` - ${err}`); - continue; - } - try { - loadTask(root, id); - console.log(`${id}: ok`); - } catch (e) { - failed++; - console.error(`${id}: INVALID (inheritance)`); - console.error(` - ${e.message}`); - } - } - if (failed) process.exit(1); -} - -function audit(args) { - let limit = 50; - for (let i = 0; i < args.length; i++) { - const a = args[i]; - if (a === '--since' || a === '-n') { limit = parseInt(args[++i], 10) || 50; } - else if (a === '-h' || a === '--help') { console.log('usage: task audit [--since N]'); return; } - } - if (!existsSync(logsFile)) { console.log('(no denials logged)'); return; } - const lines = readFileSync(logsFile, 'utf8').split('\n').filter(Boolean); - const tail = lines.slice(-limit); - for (const line of tail) { - try { - const r = JSON.parse(line); - console.log(`${r.ts} ${(r.event || '-').padEnd(26)} ${(r.task || '-').padEnd(20)} ${r.path || r.command || ''}`); - } catch { - console.log(line); - } - } - console.log(`\n(${tail.length} of ${lines.length} entries)`); -} - -function checkAgent() { - console.log('agent-scope: checking per-agent setup'); - console.log(''); - const results = detectAgents(root); - for (const r of results) { - console.log(`${r.name} ${statusGlyph(r.status)}`); - console.log(` enforcement: ${r.enforcement}`); - for (const d of r.details) console.log(d); - if (r.setup.length) { - console.log(' setup:'); - for (const s of r.setup) console.log(s); - } - console.log(''); - } - const c = summary(results); - console.log( - `Summary: ${c.ok} hard-enforced, ${c.partial} soft-rule only, ` + - `${c.warn} need attention, ${c.missing} not configured.` - ); - if (c.warn > 0) { - console.log(''); - console.log('Action: at least one agent has issues — see [! check] entries above.'); - process.exit(1); - } - console.log(''); - console.log('Tip: run `pnpm task show` to see the active task scope (if any).'); -} - -function resolveDebug() { - console.log(`repo root: ${root}`); - console.log(`env: AGENT_SCOPE_TASK=${process.env.AGENT_SCOPE_TASK || '(unset)'}`); - console.log(` AGENT_SCOPE_BOOTSTRAP=${process.env.AGENT_SCOPE_BOOTSTRAP || '(unset)'}`); - console.log(`bootstrap: ${isBootstrapActive(root) ? 'ACTIVE' : 'inactive'} (token: ${existsSync(bootstrapToken) ? 'present' : 'absent'})`); - const activeStr = existsSync(activeFile) ? readFileSync(activeFile, 'utf8').trim() : '(none)'; - console.log(`file: ${activeFile} → ${activeStr}`); - const { id, source } = resolveActiveTaskId(root); - console.log(`resolved: ${id || '(none)'} (source: ${source})`); -} - -// --------------------------------------------------------------------------- - -const [cmd, ...rest] = process.argv.slice(2); -try { - switch (cmd) { - case 'list': list(); break; - case 'show': - case undefined: - case null: - case '': show(); break; - case 'set': set(rest[0]); break; - case 'clear': clear(); break; - case 'check': check(rest[0]); break; - case 'init': await init(rest[0]); break; - case 'start': await start(); break; - case 'create': create(rest); break; - case 'validate': validate(rest[0]); break; - case 'audit': audit(rest); break; - case 'resolve': resolveDebug(); break; - case 'check-agent': - case 'check-agents': checkAgent(); break; - case '-h': case '--help': case 'help': - console.log([ - 'usage: task [args]', - '', - ' start describe the task; the agent proposes a scope in chat', - ' list list available task manifests', - ' show show the active task and its scope', - ' set set the active task', - ' clear clear the active task', - ' check check a path against the active task', - ' init create a new task manifest interactively', - ' create [...] create a manifest non-interactively (flags)', - ' validate [] validate one or all manifests', - ' audit [--since N] show recent denials from the audit log', - ' resolve debug: show how the active task is resolved', - ' check-agent verify per-agent setup (Cursor/Claude Code/Codex/...)', - ].join('\n')); - break; - default: - bail(`unknown command: ${cmd}\nrun: task --help`, 2); - } -} catch (e) { - bail(e.message); -} diff --git a/agent-scope/lib/check-agent.mjs b/agent-scope/lib/check-agent.mjs index 8480f00d1..3853d50fb 100644 --- a/agent-scope/lib/check-agent.mjs +++ b/agent-scope/lib/check-agent.mjs @@ -1,26 +1,23 @@ -// `pnpm task check-agent` — verify agent-scope is wired up correctly for -// each supported agent on this machine. Pure data; presentation is in -// agent-scope/bin/task.mjs. +// `pnpm scope:check-agent` — verify agent-scope is wired up correctly for +// each supported agent on this machine. This is the post-`git pull` +// sanity command. Coworkers run it, see a per-agent green/yellow/red, +// and know what (if anything) they need to do. // -// This is the post-`git pull` sanity command. Coworkers run it, see a -// per-agent green/yellow/red, and know what (if anything) they need to do. - -import { existsSync, readFileSync, statSync } from 'node:fs'; -import { resolve } from 'node:path'; -import { spawnSync } from 'node:child_process'; - -// One descriptor per agent. Each .check() returns { status, details } where -// status is 'ok' | 'warn' | 'missing' | 'partial'. Soft-rule-only agents -// always return 'partial' to make it clear they have no hard enforcement. +// Pure data + a small CLI driver at the bottom (so this file is also the +// executable). Library exports: detectAgents, statusGlyph, summary. // +// Status values per agent: // ok → fully wired up; hard enforcement on // partial → instruction file present; agent must self-enforce // warn → wired up but something is questionable (e.g. hook not +x) // missing → not configured at all // // We never return 'fail' because a missing agent is the normal state for -// users who don't use that agent. The CLI only exits non-zero if the -// active task can't be loaded. +// users who don't use that agent. + +import { existsSync, statSync } from 'node:fs'; +import { resolve, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; export function detectAgents(root) { return [ @@ -63,8 +60,6 @@ function cursorAgent(root) { 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', - 'post-tool-use.mjs', - 'stop.mjs', ]; for (const f of requiredHooks) { const p = resolve(hooksDir, f); @@ -85,7 +80,7 @@ function cursorAgent(root) { if (existsSync(rule)) out.details.push(' ✓ .cursor/rules/agent-scope.mdc present'); else { - out.details.push(' ! .cursor/rules/agent-scope.mdc missing — agent will lack onboarding protocol'); + out.details.push(' ! .cursor/rules/agent-scope.mdc missing — agent will lack the denial protocol'); out.status = out.status === 'ok' ? 'warn' : out.status; out.setup.push(' • Pull the latest commit — .cursor/rules/agent-scope.mdc should be tracked.'); } @@ -127,7 +122,6 @@ function claudeCodeAgent(root) { 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', - 'post-tool-use.mjs', 'user-prompt-submit.mjs', ]; for (const f of requiredHooks) { @@ -149,7 +143,7 @@ function claudeCodeAgent(root) { if (existsSync(claudeMd)) out.details.push(' ✓ CLAUDE.md present'); else { - out.details.push(' ! CLAUDE.md missing — agent will lack onboarding protocol'); + out.details.push(' ! CLAUDE.md missing — agent will lack the denial protocol'); out.status = out.status === 'ok' ? 'warn' : out.status; } @@ -250,13 +244,13 @@ function legacyAgent(root) { function isExecutable(p) { try { const m = statSync(p).mode; - // owner / group / other execute bits return Boolean(m & 0o111); } catch { return false; } } // --------------------------------------------------------------------------- // Aggregate +// --------------------------------------------------------------------------- export function statusGlyph(s) { switch (s) { @@ -273,3 +267,38 @@ export function summary(results) { for (const r of results) counts[r.status] = (counts[r.status] || 0) + 1; return counts; } + +// --------------------------------------------------------------------------- +// CLI +// --------------------------------------------------------------------------- + +function repoRootFromHere() { + const here = dirname(fileURLToPath(import.meta.url)); + return resolve(here, '..', '..'); +} + +function runCli() { + const root = process.env.AGENT_SCOPE_ROOT || repoRootFromHere(); + const results = detectAgents(root); + const counts = summary(results); + + console.log(`agent-scope: agent wiring check (root: ${root})\n`); + for (const r of results) { + console.log(`${statusGlyph(r.status)} ${r.name} — ${r.enforcement}`); + for (const d of r.details) console.log(d); + if (r.setup.length) { + console.log(' Setup:'); + for (const s of r.setup) console.log(s); + } + console.log(''); + } + console.log( + `Summary: ${counts.ok} ok · ${counts.partial} soft · ${counts.warn} check · ${counts.missing} missing`, + ); +} + +const isMain = (() => { + try { return import.meta.url === `file://${process.argv[1]}` || import.meta.url.endsWith(process.argv[1] || ''); } + catch { return false; } +})(); +if (isMain) runCli(); diff --git a/agent-scope/lib/check-agent.test.mjs b/agent-scope/lib/check-agent.test.mjs index 7c1ffe0d4..b2fb78716 100644 --- a/agent-scope/lib/check-agent.test.mjs +++ b/agent-scope/lib/check-agent.test.mjs @@ -10,7 +10,7 @@ import { detectAgents, summary, statusGlyph } from './check-agent.mjs'; function makeRepo() { const root = mkdtempSync(join(tmpdir(), 'agent-scope-checkagent-')); - mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); + mkdirSync(join(root, 'agent-scope/lib'), { recursive: true }); return root; } @@ -34,15 +34,23 @@ test('detectAgents: empty repo → all missing', () => { } finally { rmSync(root, { recursive: true, force: true }); } }); +const CURSOR_HOOKS = [ + 'session-start.mjs', 'scope-guard.mjs', + 'shell-precheck.mjs', 'shell-diff-check.mjs', +]; +const CLAUDE_HOOKS = [ + 'session-start.mjs', 'scope-guard.mjs', + 'shell-precheck.mjs', 'shell-diff-check.mjs', + 'user-prompt-submit.mjs', +]; + test('detectAgents: full Cursor wiring → ok', () => { const root = makeRepo(); try { mkdirSync(join(root, '.cursor/rules'), { recursive: true }); writeFileSync(join(root, '.cursor/hooks.json'), '{}'); writeFileSync(join(root, '.cursor/rules/agent-scope.mdc'), ''); - for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', 'stop.mjs']) { - touchHook(root, '.cursor', f); - } + for (const f of CURSOR_HOOKS) touchHook(root, '.cursor', f); const cursor = detectAgents(root).find(a => a.name === 'Cursor'); assert.equal(cursor.status, 'ok', JSON.stringify(cursor, null, 2)); } finally { rmSync(root, { recursive: true, force: true }); } @@ -54,9 +62,7 @@ test('detectAgents: Cursor hook not executable → warn', () => { mkdirSync(join(root, '.cursor/rules'), { recursive: true }); writeFileSync(join(root, '.cursor/hooks.json'), '{}'); writeFileSync(join(root, '.cursor/rules/agent-scope.mdc'), ''); - for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', 'stop.mjs']) { - touchHook(root, '.cursor', f); - } + for (const f of CURSOR_HOOKS) touchHook(root, '.cursor', f); chmodSync(join(root, '.cursor/hooks/scope-guard.mjs'), 0o644); const cursor = detectAgents(root).find(a => a.name === 'Cursor'); assert.equal(cursor.status, 'warn'); @@ -70,9 +76,7 @@ test('detectAgents: full Claude Code wiring → ok', () => { mkdirSync(join(root, '.claude'), { recursive: true }); writeFileSync(join(root, '.claude/settings.json'), '{}'); writeFileSync(join(root, 'CLAUDE.md'), ''); - for (const f of ['session-start.mjs', 'scope-guard.mjs', 'shell-precheck.mjs', 'shell-diff-check.mjs', 'post-tool-use.mjs', 'user-prompt-submit.mjs']) { - touchHook(root, '.claude', f); - } + for (const f of CLAUDE_HOOKS) touchHook(root, '.claude', f); const cc = detectAgents(root).find(a => a.name === 'Claude Code'); assert.equal(cc.status, 'ok', JSON.stringify(cc, null, 2)); } finally { rmSync(root, { recursive: true, force: true }); } diff --git a/agent-scope/lib/denial.mjs b/agent-scope/lib/denial.mjs index 789019fcf..f00adbad0 100644 --- a/agent-scope/lib/denial.mjs +++ b/agent-scope/lib/denial.mjs @@ -10,9 +10,16 @@ // action plus a free-text fallback. Never surface the full `options` // list to the user; it exists for audit / back-compat / tests. // +// Source-of-truth model: scope is now derived from the local DKG daemon +// (in-progress `tasks:Task` entities attributed to this agent — see +// `agent-scope/lib/dkg-source.mjs`). There are no local task manifests +// anymore, so the only legitimate way to extend scope is for the agent +// to file a NEW in-progress task via `dkg_add_task` covering the path +// they need. The denial menus reflect that. +// // Zero IO, zero deps. Pure functions; unit-testable. -import { listTasks, loadTask, checkPath, PROTECTED_PATTERNS } from './scope.mjs'; +import { PROTECTED_PATTERNS } from './scope.mjs'; export const DENIAL_FENCE_START = ''; export const DENIAL_FENCE_END = ''; @@ -21,9 +28,6 @@ export const DENIAL_FENCE_END = ''; // Suggestion heuristics // --------------------------------------------------------------------------- -// Propose a single representative glob for a denied path. Conservative: covers -// the immediate parent directory's subtree. Callers can suggest tighter globs -// interactively if the user prefers. export function suggestGlob(relPath) { if (typeof relPath !== 'string' || !relPath) return null; const clean = relPath.replace(/\/+$/, ''); @@ -33,8 +37,6 @@ export function suggestGlob(relPath) { return `${dir}/**`; } -// Propose a tighter glob targeting the exact basename stem (same directory, -// any extension). Useful when the agent is likely to touch sibling files. export function suggestTightGlob(relPath) { if (typeof relPath !== 'string' || !relPath) return null; const clean = relPath.replace(/\/+$/, ''); @@ -47,114 +49,84 @@ export function suggestTightGlob(relPath) { return dir ? `${dir}/${stem}*` : `${stem}*`; } -// Find other task manifests whose scope already covers the denied path. -// Skips the currently-active task. Protected paths have no alternatives. -export function findAlternativeTasks(relPath, root, excludeTaskId = null) { - if (!relPath || !root) return []; - const out = []; - let ids = []; - try { ids = listTasks(root); } catch { return []; } - for (const id of ids) { - if (id === excludeTaskId) continue; - let t; - try { t = loadTask(root, id); } catch { continue; } - let d; - try { d = checkPath(t, relPath, root); } catch { continue; } - if (d === 'allow' || d === 'exempt') { - out.push({ id, description: t.description || '' }); - } - } - return out; -} - // --------------------------------------------------------------------------- // Option menus // --------------------------------------------------------------------------- -// A free-text fallback. Included in every menu so the user can bypass the -// presets entirely. When picked, the agent asks the user to describe what to -// do next as a regular chat message. const CUSTOM_OPTION = { id: 'custom_instruction', label: 'Let me type my own instruction', action: { kind: 'custom' }, }; -// Shorter version used in the two-option `simpleOptions` surface — this is -// the label the user sees in the plan-mode AskQuestion, so it should read -// like a chat button, not a legal clause. const CUSTOM_OPTION_SIMPLE = { id: 'custom_instruction', label: 'Type what you want instead', action: { kind: 'custom' }, }; -// Short, natural-language label for the recommended action. The full -// `options` array keeps its verbose labels (back-compat + audit), but the -// plan-mode AskQuestion uses these casual ones so the prompt reads like a -// human wrote it. Falls back to the verbose label if the id is unknown. -function simpleLabelFor(optionId, { deniedPath, activeTaskId, altTaskId } = {}) { - if (optionId === 'add_file') return 'Add this file to the task and try again'; - if (optionId === 'add_glob') return 'Add this folder to the task and try again'; - if (optionId === 'bootstrap') return 'Yes, unlock it so I can do this edit'; - if (optionId === 'cancel') return 'Skip it'; - if (optionId === 'skip') return 'Skip and keep working on other things'; - if (optionId === 'fix_manifest') return 'Open the task file so I can fix it'; - if (optionId === 'clear_task') return 'Clear the active task for now'; - if (optionId === 'acknowledge') return 'OK, keep going'; - if (optionId && optionId.startsWith('switch_task_') && altTaskId) { - return `Switch to task "${altTaskId}" and try again`; - } +function simpleLabelFor(optionId) { + if (optionId === 'new_task_glob') return 'File a new in-progress task covering this folder and continue'; + if (optionId === 'new_task_file') return 'File a new in-progress task covering this file and continue'; + if (optionId === 'bootstrap') return 'Yes, unlock it so I can do this edit'; + if (optionId === 'cancel') return 'Skip it'; + if (optionId === 'skip') return 'Skip and keep working on other things'; + if (optionId === 'restart_daemon') return 'Tell me how to restart the DKG daemon'; + if (optionId === 'configure_dkg') return 'Tell me how to set up the DKG project / agent'; + if (optionId === 'acknowledge') return 'OK, keep going'; return null; } -// Build the two-option `simpleOptions` array for plan-mode AskQuestion. -// It always contains exactly two entries: the recommended option (with a -// short human label) and a free-text fallback. function buildSimpleOptions(fullOptions, recommendedId) { - const rec = fullOptions.find(o => o.id === recommendedId) || fullOptions[0]; + const rec = fullOptions.find((o) => o.id === recommendedId) || fullOptions[0]; if (!rec) return [CUSTOM_OPTION_SIMPLE]; - const altTaskId = rec.id.startsWith('switch_task_') ? rec.id.slice('switch_task_'.length) : null; - const label = simpleLabelFor(rec.id, { altTaskId }) || rec.label; + const label = simpleLabelFor(rec.id) || rec.label; return [ { id: rec.id, label, action: rec.action }, CUSTOM_OPTION_SIMPLE, ]; } -// Menu for out-of-scope write denials (path is in the repo but not in scope). -export function buildOutOfScopeOptions({ deniedPath, activeTaskId, alternatives }) { +export function buildOutOfScopeOptions({ deniedPath, activeTaskUris }) { + const folderGlob = suggestGlob(deniedPath); + const uris = Array.isArray(activeTaskUris) ? activeTaskUris : []; + const taskList = uris.length ? uris.join(', ') : 'none'; const opts = [ { - id: 'add_file', - label: `Add "${deniedPath}" to ${activeTaskId}'s manifest`, - action: { kind: 'add_to_manifest', task: activeTaskId, patterns: [deniedPath] }, + id: 'new_task_glob', + label: `File a new in-progress task covering "${folderGlob}"`, + action: { + kind: 'new_in_progress_task', + suggestedScopedToPath: [folderGlob], + suggestedTitle: `Extend scope to ${folderGlob}`, + rationale: `Existing in-progress task${uris.length === 1 ? '' : 's'} (${taskList}) doesn't cover ${deniedPath}.`, + }, }, { - id: 'add_glob', - label: `Add "${suggestGlob(deniedPath)}" to ${activeTaskId}'s manifest`, - action: { kind: 'add_to_manifest', task: activeTaskId, patterns: [suggestGlob(deniedPath)] }, + id: 'new_task_file', + label: `File a new in-progress task covering exactly "${deniedPath}"`, + action: { + kind: 'new_in_progress_task', + suggestedScopedToPath: [deniedPath], + suggestedTitle: `Extend scope to ${deniedPath}`, + rationale: `Existing in-progress task${uris.length === 1 ? '' : 's'} (${taskList}) doesn't cover ${deniedPath}.`, + }, + }, + { + id: 'skip', + label: 'Skip this edit, keep working on in-scope files', + action: { kind: 'skip' }, + }, + { + id: 'cancel', + label: 'Cancel this turn — the edit should not happen', + action: { kind: 'cancel' }, }, - ]; - if (Array.isArray(alternatives) && alternatives.length) { - for (const alt of alternatives.slice(0, 3)) { - opts.push({ - id: `switch_task_${alt.id}`, - label: `Switch active task to "${alt.id}"` + (alt.description ? ` — ${alt.description}` : ''), - action: { kind: 'switch_task', task: alt.id }, - }); - } - } - opts.push( - { id: 'skip', label: 'Skip this edit, keep working on in-scope files', action: { kind: 'skip' } }, - { id: 'cancel', label: 'Cancel this turn — the edit should not happen', action: { kind: 'cancel' } }, CUSTOM_OPTION, - ); + ]; return opts; } -// Classify a protected path so the denial prose can explain WHY that specific -// file is guarded, not just that it is. Keeps the menu copy concrete. export function classifyProtected(relPath) { if (!relPath || typeof relPath !== 'string') return { kind: 'unknown', role: 'protected file' }; if (relPath.startsWith('.cursor/hooks/') || relPath === '.cursor/hooks.json') { @@ -163,28 +135,21 @@ export function classifyProtected(relPath) { if (relPath === '.cursor/rules/agent-scope.mdc') { return { kind: 'cursor-rule', role: 'the rule that tells the agent to surface denial menus via AskQuestion' }; } + if (relPath.startsWith('.claude/hooks/') || relPath === '.claude/settings.json') { + return { kind: 'claude-hook', role: 'a Claude Code hook that enforces agent-scope in every session' }; + } if (relPath.startsWith('agent-scope/lib/')) { return { kind: 'scope-library', role: 'the shared enforcement library used by every hook' }; } - if (relPath.startsWith('agent-scope/bin/')) { - return { kind: 'scope-cli', role: 'the `pnpm task` CLI — if modified, the whole task workflow can be subverted' }; - } - if (relPath.startsWith('agent-scope/schema/')) { - return { kind: 'scope-schema', role: 'the JSON schema that validates every task manifest' }; - } - if (relPath.startsWith('agent-scope/tasks/')) { - return { kind: 'task-manifest', role: 'a task manifest — editing it would silently expand or shrink what agents can write' }; - } - if (relPath === 'agent-scope/active') { - return { kind: 'active-pointer', role: 'the active-task pointer — editing it would let the agent pick its own scope' }; - } if (relPath === 'agent-scope/.bootstrap-token') { return { kind: 'bootstrap-token', role: 'the bootstrap token itself — writing it would self-grant full access' }; } + if (relPath === 'AGENTS.md' || relPath === 'GEMINI.md' || relPath === '.cursorrules') { + return { kind: 'agent-instructions', role: 'the agent-instruction file the AI reads to learn how to behave in this repo' }; + } return { kind: 'unknown', role: 'a file on the hardcoded protected list' }; } -// Menu for protected-path denials — only the human can unlock. export function buildProtectedOptions({ deniedPath }) { return [ { @@ -209,44 +174,59 @@ export function buildProtectedOptions({ deniedPath }) { ]; } -// Menu for manifest load errors — the task file is broken. -export function buildLoadErrorOptions({ taskId, error }) { +export function buildResolutionErrorOptions({ reason }) { + if (reason === 'daemon-unreachable') { + return [ + { + id: 'restart_daemon', + label: 'Tell me how to restart the local DKG daemon', + action: { + kind: 'restart_daemon', + instruction: 'In your own terminal run:\n dkg start\n(or `pnpm -F @origintrail-official/dkg-cli start`).\nThen reply "go" and I\'ll re-check.', + }, + }, + { + id: 'skip', + label: 'Keep going in soft mode (only protected paths blocked)', + action: { kind: 'skip' }, + }, + { id: 'cancel', label: 'Cancel this turn', action: { kind: 'cancel' } }, + CUSTOM_OPTION, + ]; + } return [ { - id: 'fix_manifest', - label: `Open and fix agent-scope/tasks/${taskId}.json`, - action: { kind: 'fix_manifest', task: taskId, error }, + id: 'configure_dkg', + label: 'Tell me how to wire up the DKG project + agent for this workspace', + action: { + kind: 'configure_dkg', + instruction: 'Edit `.dkg/config.yaml` so it has both `contextGraph: ` and `agent.uri: ` populated, then reply "go". (Alternatively, export `DKG_PROJECT` and `DKG_AGENT_URI` for one-off runs.)', + }, }, { - id: 'clear_task', - label: 'Clear the active task for now (pnpm task clear)', - action: { kind: 'clear_task' }, + id: 'skip', + label: 'Keep going in soft mode (only protected paths blocked)', + action: { kind: 'skip' }, }, { id: 'cancel', label: 'Cancel this turn', action: { kind: 'cancel' } }, CUSTOM_OPTION, ]; } -// Pick a sensible default for the highlighted option. Agents are instructed -// to respect this when surfacing the menu via AskQuestion, but it's only a -// recommendation — the user is always free to choose anything. function recommendFor(reason, options) { - const ids = new Set(options.map(o => o.id)); + const ids = new Set(options.map((o) => o.id)); if (reason === 'out-of-scope') { - if (ids.has('add_glob')) return 'add_glob'; - if (ids.has('add_file')) return 'add_file'; - } - if (reason === 'protected') { - return 'cancel'; // safest default; user opts into bootstrap deliberately - } - if (reason === 'manifest-load-error') { - if (ids.has('fix_manifest')) return 'fix_manifest'; + if (ids.has('new_task_glob')) return 'new_task_glob'; + if (ids.has('new_task_file')) return 'new_task_file'; } + if (reason === 'protected') return 'cancel'; + if (reason === 'daemon-unreachable') return 'restart_daemon'; + if (reason === 'configuration-error') return 'configure_dkg'; return options[0]?.id || null; } // --------------------------------------------------------------------------- -// Full denial message builders (prose + structured block) +// Full denial message builders // --------------------------------------------------------------------------- function wrapStructured(payload) { @@ -257,10 +237,6 @@ function wrapStructured(payload) { ].join('\n'); } -// Emit a short human-readable summary and append the machine-readable JSON -// block. Agents are instructed to quote `humanSummary` verbatim in their -// AskQuestion prompt and offer only the two `simpleOptions` — never the -// full `options` list. function render(summary, structured) { return [ `agent-scope: ${summary}`, @@ -269,7 +245,6 @@ function render(summary, structured) { ].join('\n'); } -// Build a preToolUse denial message. export function buildPreToolUseDenial({ tool, deniedPath, decision, task, taskId, root, }) { @@ -290,6 +265,7 @@ export function buildPreToolUseDenial({ protectedKind: classification.kind, protectedRole: classification.role, activeTask: taskId || null, + activeTaskUris: (task && task.dkgTaskUris) || [], protectedPatterns: [...PROTECTED_PATTERNS], humanSummary, options, @@ -300,16 +276,19 @@ export function buildPreToolUseDenial({ return { message: render(humanSummary, structured), structured }; } - // out-of-scope (deny) - const alternatives = findAlternativeTasks(deniedPath, root, taskId); - const options = buildOutOfScopeOptions({ deniedPath, activeTaskId: taskId, alternatives }); + const activeTaskUris = (task && task.dkgTaskUris) || []; + const options = buildOutOfScopeOptions({ deniedPath, activeTaskUris }); const recommendedOptionId = recommendFor('out-of-scope', options); - const positives = ((task && task.allowed) || []).filter(p => !p.startsWith('!')); - const exemptions = ((task && task.exemptions) || []).filter(p => !p.startsWith('!')); + const positives = ((task && task.allowed) || []).filter((p) => !p.startsWith('!')); + const exemptions = ((task && task.exemptions) || []).filter((p) => !p.startsWith('!')); + const taskListLabel = activeTaskUris.length === 1 + ? `\`${activeTaskUris[0]}\`` + : activeTaskUris.length + ? `${activeTaskUris.length} in-progress tasks` + : 'no in-progress task'; const humanSummary = - `I'd like to edit \`${deniedPath}\`, but the active task ` + - `${taskId ? `\`${taskId}\`` : '(none)'}` + - `${task && task.description ? ` — ${task.description}` : ''}` + + `I'd like to edit \`${deniedPath}\`, but ${taskListLabel}` + + `${task && task.description ? ` (${task.description})` : ''}` + ` doesn't cover that file.`; const structured = { version: 1, @@ -318,12 +297,12 @@ export function buildPreToolUseDenial({ tool, deniedPath, activeTask: taskId || null, + activeTaskUris, activeTaskDescription: (task && task.description) || null, allowed: positives, exemptions, suggestedGlob: suggestGlob(deniedPath), suggestedTightGlob: suggestTightGlob(deniedPath), - alternativeTasks: alternatives, humanSummary, options, simpleOptions: buildSimpleOptions(options, recommendedOptionId), @@ -333,19 +312,17 @@ export function buildPreToolUseDenial({ return { message: render(humanSummary, structured), structured }; } -// Build a manifest-load-error denial message. -export function buildLoadErrorDenial({ taskId, error }) { - const options = buildLoadErrorOptions({ taskId, error }); - const recommendedOptionId = recommendFor('manifest-load-error', options); - const humanSummary = - `The active task manifest \`${taskId}\` won't load — ${error}. ` + - `I can't apply any scope check until it's fixed or cleared.`; +export function buildResolutionErrorDenial({ reason, diagnostic }) { + const options = buildResolutionErrorOptions({ reason }); + const recommendedOptionId = recommendFor(reason, options); + const humanSummary = reason === 'daemon-unreachable' + ? `I can't reach the local DKG daemon, so I can't check whether this edit is in scope. ${diagnostic || ''}`.trim() + : `The DKG project / agent isn't fully configured for this workspace, so I can't resolve scope. ${diagnostic || ''}`.trim(); const structured = { version: 1, hook: 'preToolUse', - reason: 'manifest-load-error', - activeTask: taskId, - error, + reason, + diagnostic: diagnostic || null, humanSummary, options, simpleOptions: buildSimpleOptions(options, recommendedOptionId), @@ -355,18 +332,23 @@ export function buildLoadErrorDenial({ taskId, error }) { return { message: render(humanSummary, structured), structured }; } -// Build a beforeShellExecution denial message from a set of violations. -// A violation is { sub, cmd, path, decision }. +// Back-compat alias retained so older hook bindings keep loading. Maps to +// the new resolution-error builder; pre-existing callers that pass +// `{ taskId, error }` get a sensible default. +export function buildLoadErrorDenial({ taskId, error } = {}) { + return buildResolutionErrorDenial({ + reason: 'configuration-error', + diagnostic: `Couldn't load active scope${taskId ? ` for task ${taskId}` : ''}: ${error || 'unknown error'}.`, + }); +} + export function buildShellPrecheckDenial({ command, violations, task, taskId, root, }) { - const anyProtected = violations.some(v => String(v.decision).startsWith('protected')); - // Use the first out-of-scope path (if any) to seed the menu; if everything - // is protected, show the protected menu. If mixed, protected wins because - // the user needs bootstrap before we can address scope fixes. + const anyProtected = violations.some((v) => String(v.decision).startsWith('protected')); let reason, options, suggestedFix; - const firstScopePath = violations.find(v => v.decision === 'deny')?.path || null; - const firstProtPath = violations.find(v => String(v.decision).startsWith('protected'))?.path || null; + const firstScopePath = violations.find((v) => v.decision === 'deny')?.path || null; + const firstProtPath = violations.find((v) => String(v.decision).startsWith('protected'))?.path || null; if (anyProtected) { reason = 'protected'; @@ -374,11 +356,11 @@ export function buildShellPrecheckDenial({ suggestedFix = 'enable bootstrap — see options'; } else if (firstScopePath) { reason = 'out-of-scope'; - const alternatives = findAlternativeTasks(firstScopePath, root, taskId); options = buildOutOfScopeOptions({ - deniedPath: firstScopePath, activeTaskId: taskId, alternatives, + deniedPath: firstScopePath, + activeTaskUris: (task && task.dkgTaskUris) || [], }); - suggestedFix = `add "${suggestGlob(firstScopePath)}" to ${taskId}'s manifest`; + suggestedFix = `file a new in-progress task covering "${suggestGlob(firstScopePath)}"`; } else { reason = 'unknown'; options = [ @@ -392,13 +374,18 @@ export function buildShellPrecheckDenial({ const recommendedOptionId = recommendFor(reason, options); const firstPath = firstProtPath || firstScopePath || '(target)'; const firstCmd = violations[0]?.cmd || 'command'; + const taskListLabel = (task?.dkgTaskUris?.length || 0) === 1 + ? `\`${task.dkgTaskUris[0]}\`` + : (task?.dkgTaskUris?.length || 0) > 1 + ? `${task.dkgTaskUris.length} in-progress tasks` + : 'no in-progress task'; const humanSummary = reason === 'protected' ? `The shell command I was about to run (\`${firstCmd}\` on \`${firstPath}\`) ` + `would touch a protected system file. Blocked before it ran.` : reason === 'out-of-scope' ? `The shell command I was about to run (\`${firstCmd}\` on \`${firstPath}\`) ` + - `would write outside the active task \`${taskId || '(none)'}\`. Blocked before it ran.` + `would write outside ${taskListLabel}. Blocked before it ran.` : `That shell command was blocked before it ran.`; const structured = { @@ -407,7 +394,8 @@ export function buildShellPrecheckDenial({ reason, command, activeTask: taskId || null, - violations: violations.map(v => ({ + activeTaskUris: (task && task.dkgTaskUris) || [], + violations: violations.map((v) => ({ cmd: v.cmd, path: v.path, decision: v.decision, })), suggestedFix, @@ -421,9 +409,6 @@ export function buildShellPrecheckDenial({ return { message: render(humanSummary, structured), structured }; } -// Build an afterShellExecution context message. Unlike the other two this -// isn't a deny — the shell already ran. Files were reverted/deleted. Still -// emit a plan-mode menu so the agent surfaces the "what now?" question. export function buildAfterShellContext({ command, task, taskId, root, reverted, deleted, unreverted, @@ -433,7 +418,7 @@ export function buildAfterShellContext({ unreverted = Array.isArray(unreverted) ? unreverted : []; const touched = [...reverted, ...deleted]; - const firstProtected = touched.find(p => { + const firstProtected = touched.find((p) => { for (const pat of PROTECTED_PATTERNS) { const re = new RegExp('^' + pat.replace(/[.+^${}()|[\]\\]/g, '\\$&').replace(/\*\*/g, '.*').replace(/\*/g, '[^/]*') + '$'); if (re.test(p)) return true; @@ -445,11 +430,11 @@ export function buildAfterShellContext({ if (firstProtected) { reason = 'protected'; options = buildProtectedOptions({ deniedPath: firstProtected }); - } else if (touched.length && taskId) { + } else if (touched.length) { reason = 'out-of-scope'; - const alternatives = findAlternativeTasks(touched[0], root, taskId); options = buildOutOfScopeOptions({ - deniedPath: touched[0], activeTaskId: taskId, alternatives, + deniedPath: touched[0], + activeTaskUris: (task && task.dkgTaskUris) || [], }); } else { reason = 'unknown'; @@ -462,6 +447,11 @@ export function buildAfterShellContext({ const recommendedOptionId = recommendFor(reason, options); const touchedCount = reverted.length + deleted.length; + const taskListLabel = (task?.dkgTaskUris?.length || 0) === 1 + ? `\`${task.dkgTaskUris[0]}\`` + : (task?.dkgTaskUris?.length || 0) > 1 + ? `${task.dkgTaskUris.length} in-progress tasks` + : 'no in-progress task'; const humanSummary = (() => { if (touchedCount === 0) { return `A shell command ran and finished cleanly — nothing needed to be reverted.`; @@ -474,7 +464,7 @@ export function buildAfterShellContext({ return `A shell command touched a protected system file, so I ${fix} to put things back.`; } if (reason === 'out-of-scope') { - return `A shell command touched files outside the active task \`${taskId}\`, so I ${fix} to put things back.`; + return `A shell command touched files outside ${taskListLabel}, so I ${fix} to put things back.`; } return `A shell command touched files it shouldn't have, so I ${fix}.`; })(); @@ -485,9 +475,10 @@ export function buildAfterShellContext({ reason, command, activeTask: taskId || null, + activeTaskUris: (task && task.dkgTaskUris) || [], reverted, deleted, - unreverted: unreverted.map(u => ({ path: u.path, status: u.status, reason: u.reason })), + unreverted: unreverted.map((u) => ({ path: u.path, status: u.status, reason: u.reason })), humanSummary, options, simpleOptions: buildSimpleOptions(options, recommendedOptionId), @@ -495,8 +486,6 @@ export function buildAfterShellContext({ agentReasoning: null, }; - // Prose stays minimal: the humanSummary + paths the agent may want to - // reference. No banners, no STOP, no agent-directed meta copy. const lines = [humanSummary]; if (reverted.length) { lines.push('', 'Reverted:'); diff --git a/agent-scope/lib/denial.test.mjs b/agent-scope/lib/denial.test.mjs index 9d32f6597..3d40049f0 100644 --- a/agent-scope/lib/denial.test.mjs +++ b/agent-scope/lib/denial.test.mjs @@ -1,50 +1,41 @@ -import test from 'node:test'; -import assert from 'node:assert/strict'; -import { mkdtempSync, writeFileSync, mkdirSync, rmSync } from 'node:fs'; -import { join } from 'node:path'; -import { tmpdir } from 'node:os'; +// Unit tests for denial.mjs. Verifies the prose+JSON shape every hook +// emits so the agent's plan-mode denial protocol stays stable. +// +// node --test agent-scope/lib/denial.test.mjs +import { test } from 'node:test'; +import assert from 'node:assert/strict'; import { - suggestGlob, suggestTightGlob, findAlternativeTasks, - buildOutOfScopeOptions, buildProtectedOptions, buildLoadErrorOptions, - buildPreToolUseDenial, buildLoadErrorDenial, - buildShellPrecheckDenial, buildAfterShellContext, - classifyProtected, DENIAL_FENCE_START, DENIAL_FENCE_END, + buildPreToolUseDenial, buildShellPrecheckDenial, buildAfterShellContext, + buildResolutionErrorDenial, buildLoadErrorDenial, + buildOutOfScopeOptions, buildProtectedOptions, buildResolutionErrorOptions, + classifyProtected, suggestGlob, suggestTightGlob, } from './denial.mjs'; -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -function makeTempRepo() { - const root = mkdtempSync(join(tmpdir(), 'as-denial-')); - mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); - mkdirSync(join(root, 'agent-scope/lib'), { recursive: true }); - return root; -} -function writeTask(root, id, manifest) { - writeFileSync( - join(root, 'agent-scope/tasks', `${id}.json`), - JSON.stringify({ id, description: manifest.description || '', ...manifest }, null, 2) - ); -} -function cleanup(root) { rmSync(root, { recursive: true, force: true }); } - -function extractJson(message) { +function extractJSON(message) { const start = message.indexOf(DENIAL_FENCE_START); - const end = message.indexOf(DENIAL_FENCE_END); - assert.ok(start >= 0, 'message has begin fence'); - assert.ok(end > start, 'message has end fence'); + const end = message.indexOf(DENIAL_FENCE_END); + assert.ok(start >= 0 && end > start, `no fence found in: ${message.slice(0, 200)}`); const body = message.slice(start + DENIAL_FENCE_START.length, end).trim(); return JSON.parse(body); } -// --------------------------------------------------------------------------- -// suggestGlob -// --------------------------------------------------------------------------- +function inProgressTask(allowed = [], exemptions = [], uris = ['urn:dkg:task:demo']) { + return { + id: uris[0]?.split(':').pop() || 'demo', + dkgTaskUris: uris, + description: 'demo task', + allowed, + exemptions, + reason: 'ok', + tasks: uris.map((u) => ({ uri: u, title: 'demo' })), + }; +} + +// --- suggestGlob / suggestTightGlob -------------------------------------- -test('suggestGlob: typical nested file', () => { +test('suggestGlob: directory glob for nested file', () => { assert.equal(suggestGlob('packages/foo/src/bar.ts'), 'packages/foo/src/**'); }); @@ -52,620 +43,200 @@ test('suggestGlob: top-level file', () => { assert.equal(suggestGlob('README.md'), 'README.md'); }); -test('suggestGlob: empty / invalid', () => { - assert.equal(suggestGlob(''), null); - assert.equal(suggestGlob(undefined), null); - assert.equal(suggestGlob(null), null); - assert.equal(suggestGlob(42), null); +test('suggestGlob: invalid input', () => { + assert.equal(suggestGlob(null), null); + assert.equal(suggestGlob(''), null); }); -test('suggestGlob: trailing slash is stripped', () => { - assert.equal(suggestGlob('packages/foo/src/'), 'packages/foo/**'); -}); - -// --------------------------------------------------------------------------- -// suggestTightGlob -// --------------------------------------------------------------------------- - -test('suggestTightGlob: basename stem + sibling extensions', () => { +test('suggestTightGlob: stem* in same dir', () => { assert.equal(suggestTightGlob('packages/foo/src/bar.ts'), 'packages/foo/src/bar*'); }); -test('suggestTightGlob: multi-dot filename uses first-dot stem', () => { - assert.equal(suggestTightGlob('packages/foo/bar.test.ts'), 'packages/foo/bar*'); -}); - -test('suggestTightGlob: extensionless', () => { - assert.equal(suggestTightGlob('scripts/build'), 'scripts/build*'); -}); +// --- classifyProtected ---------------------------------------------------- -test('suggestTightGlob: dotfile keeps the full basename', () => { - // leading-dot filenames have no conventional "stem + ext" split; use as-is - assert.equal(suggestTightGlob('.env'), '.env*'); -}); - -test('suggestTightGlob: empty input returns null', () => { - assert.equal(suggestTightGlob(''), null); - assert.equal(suggestTightGlob(undefined), null); -}); - -// --------------------------------------------------------------------------- -// findAlternativeTasks -// --------------------------------------------------------------------------- - -test('findAlternativeTasks: finds a task that covers the path', () => { - const root = makeTempRepo(); - try { - writeTask(root, 'staking', { description: 'Staking work', allowed: ['packages/evm-module/contracts/**'] }); - writeTask(root, 'sync', { description: 'Sync work', allowed: ['packages/sync/**'] }); - const r = findAlternativeTasks('packages/evm-module/contracts/Stk.sol', root, 'sync'); - assert.equal(r.length, 1); - assert.equal(r[0].id, 'staking'); - assert.equal(r[0].description, 'Staking work'); - } finally { cleanup(root); } -}); - -test('findAlternativeTasks: excludes the current task', () => { - const root = makeTempRepo(); - try { - writeTask(root, 'wide', { allowed: ['**/*'] }); - const r = findAlternativeTasks('any/file.ts', root, 'wide'); - assert.equal(r.length, 0); - } finally { cleanup(root); } -}); - -test('findAlternativeTasks: returns [] when no manifests match', () => { - const root = makeTempRepo(); - try { - writeTask(root, 'narrow', { allowed: ['packages/only/**'] }); - const r = findAlternativeTasks('totally/unrelated/file.ts', root, null); - assert.equal(r.length, 0); - } finally { cleanup(root); } -}); - -test('findAlternativeTasks: skips broken manifests silently', () => { - const root = makeTempRepo(); - try { - writeTask(root, 'good', { allowed: ['**/*'] }); - writeFileSync(join(root, 'agent-scope/tasks/broken.json'), '{ not valid json'); - const r = findAlternativeTasks('x/y.ts', root, null); - assert.equal(r.length, 1); - assert.equal(r[0].id, 'good'); - } finally { cleanup(root); } -}); - -// --------------------------------------------------------------------------- -// buildOutOfScopeOptions -// --------------------------------------------------------------------------- - -test('buildOutOfScopeOptions: base menu has add_file, add_glob, skip, cancel, custom_instruction', () => { - const opts = buildOutOfScopeOptions({ - deniedPath: 'packages/foo/bar.ts', activeTaskId: 'my-task', alternatives: [], - }); - const ids = opts.map(o => o.id); - assert.ok(ids.includes('add_file')); - assert.ok(ids.includes('add_glob')); - assert.ok(ids.includes('skip')); - assert.ok(ids.includes('cancel')); - assert.ok(ids.includes('custom_instruction')); -}); - -test('buildOutOfScopeOptions: custom_instruction is the free-text fallback', () => { - const opts = buildOutOfScopeOptions({ - deniedPath: 'x/y.ts', activeTaskId: 't', alternatives: [], - }); - const custom = opts.find(o => o.id === 'custom_instruction'); - assert.ok(custom, 'custom option present'); - assert.equal(custom.action.kind, 'custom'); - assert.match(custom.label, /type/i); -}); - -test('buildOutOfScopeOptions: add_file action has the exact path', () => { - const opts = buildOutOfScopeOptions({ - deniedPath: 'packages/foo/bar.ts', activeTaskId: 'my-task', alternatives: [], - }); - const addFile = opts.find(o => o.id === 'add_file'); - assert.equal(addFile.action.kind, 'add_to_manifest'); - assert.equal(addFile.action.task, 'my-task'); - assert.deepEqual(addFile.action.patterns, ['packages/foo/bar.ts']); -}); - -test('buildOutOfScopeOptions: add_glob uses suggestGlob', () => { - const opts = buildOutOfScopeOptions({ - deniedPath: 'packages/foo/bar.ts', activeTaskId: 't', alternatives: [], - }); - const addGlob = opts.find(o => o.id === 'add_glob'); - assert.deepEqual(addGlob.action.patterns, ['packages/foo/**']); -}); - -test('buildOutOfScopeOptions: switch options are added per alternative (max 3)', () => { - const alternatives = [ - { id: 'a', description: 'A' }, - { id: 'b', description: 'B' }, - { id: 'c', description: 'C' }, - { id: 'd', description: 'D' }, - ]; - const opts = buildOutOfScopeOptions({ - deniedPath: 'x/y.ts', activeTaskId: 't', alternatives, - }); - const switchIds = opts.filter(o => o.id.startsWith('switch_task_')).map(o => o.id); - assert.equal(switchIds.length, 3); - assert.deepEqual(switchIds, ['switch_task_a', 'switch_task_b', 'switch_task_c']); -}); - -// --------------------------------------------------------------------------- -// buildProtectedOptions -// --------------------------------------------------------------------------- - -// --------------------------------------------------------------------------- -// classifyProtected — explains WHY a specific protected file is guarded -// --------------------------------------------------------------------------- - -test('classifyProtected: cursor hook', () => { +test('classifyProtected: known categories', () => { assert.equal(classifyProtected('.cursor/hooks/scope-guard.mjs').kind, 'cursor-hook'); - assert.equal(classifyProtected('.cursor/hooks.json').kind, 'cursor-hook'); -}); -test('classifyProtected: scope library / CLI / schema', () => { + assert.equal(classifyProtected('.cursor/rules/agent-scope.mdc').kind, 'cursor-rule'); + assert.equal(classifyProtected('.claude/hooks/scope-guard.mjs').kind, 'claude-hook'); assert.equal(classifyProtected('agent-scope/lib/scope.mjs').kind, 'scope-library'); - assert.equal(classifyProtected('agent-scope/bin/task.mjs').kind, 'scope-cli'); - assert.equal(classifyProtected('agent-scope/schema/task.schema.json').kind, 'scope-schema'); -}); -test('classifyProtected: manifests, active, token, rule', () => { - assert.equal(classifyProtected('agent-scope/tasks/sync.json').kind, 'task-manifest'); - assert.equal(classifyProtected('agent-scope/active').kind, 'active-pointer'); assert.equal(classifyProtected('agent-scope/.bootstrap-token').kind, 'bootstrap-token'); - assert.equal(classifyProtected('.cursor/rules/agent-scope.mdc').kind, 'cursor-rule'); -}); -test('classifyProtected: unknown input yields safe default', () => { - assert.equal(classifyProtected(null).kind, 'unknown'); - assert.equal(classifyProtected('').kind, 'unknown'); -}); - -test('buildProtectedOptions: bootstrap + cancel + skip + custom_instruction', () => { - const opts = buildProtectedOptions({ deniedPath: '.cursor/hooks/x.mjs' }); - assert.deepEqual( - opts.map(o => o.id), - ['bootstrap', 'cancel', 'skip', 'custom_instruction'], - ); - assert.equal(opts[0].action.kind, 'bootstrap'); - assert.ok(opts[0].action.instruction.includes('bootstrap-token')); - // Yes / No framing — `bootstrap` label leads with "Yes", `cancel`/`skip` with "No". - assert.ok(opts[0].label.startsWith('Yes'), 'bootstrap label should start with Yes'); - assert.ok(opts[1].label.startsWith('No'), 'cancel label should start with No'); - assert.ok(opts[2].label.startsWith('No'), 'skip label should start with No'); -}); - -// --------------------------------------------------------------------------- -// buildLoadErrorOptions -// --------------------------------------------------------------------------- - -test('buildLoadErrorOptions: fix, clear, cancel, custom_instruction', () => { - const opts = buildLoadErrorOptions({ taskId: 'broken', error: 'syntax' }); - assert.deepEqual( - opts.map(o => o.id), - ['fix_manifest', 'clear_task', 'cancel', 'custom_instruction'], - ); - assert.equal(opts[0].action.task, 'broken'); -}); - -// --------------------------------------------------------------------------- -// buildPreToolUseDenial -// --------------------------------------------------------------------------- - -test('buildPreToolUseDenial: protected → structured protected menu', () => { - const root = makeTempRepo(); - try { - const { message, structured } = buildPreToolUseDenial({ - tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', - decision: 'protected', task: null, taskId: null, root, - }); - const parsed = extractJson(message); - assert.equal(parsed.hook, 'preToolUse'); - assert.equal(parsed.reason, 'protected'); - assert.equal(parsed.deniedPath, '.cursor/hooks/x.mjs'); - assert.ok(parsed.protectedPatterns.length > 0); - assert.deepEqual( - parsed.options.map(o => o.id), - ['bootstrap', 'cancel', 'skip', 'custom_instruction'], - ); - assert.equal(parsed.recommendedOptionId, 'cancel'); - assert.equal(parsed.agentReasoning, null, 'agent fills this in when surfacing'); - assert.equal(structured.reason, 'protected'); - // Human summary is short, natural, contains the denied path, and is - // surfaced in the rendered prose so the agent can quote it verbatim. - assert.ok(typeof parsed.humanSummary === 'string'); - assert.ok(parsed.humanSummary.length > 0 && parsed.humanSummary.length < 400, - 'humanSummary stays concise'); - assert.ok(parsed.humanSummary.includes('.cursor/hooks/x.mjs')); - assert.ok(message.includes(parsed.humanSummary), - 'rendered prose includes the humanSummary verbatim'); - // No more ALL-CAPS banners or agent-directed meta copy in the prose. - assert.ok(!message.includes('PROTECTED PATH'), 'prose is banner-free'); - assert.ok(!message.includes('STOP'), 'prose is banner-free'); - assert.ok(!/surface the menu below/i.test(message), - 'prose has no "surface the menu" agent-directed copy'); - // Structured payload carries the classification so downstream tools can use it. - assert.equal(parsed.protectedKind, 'cursor-hook'); - assert.ok(typeof parsed.protectedRole === 'string' && parsed.protectedRole.length > 0); - } finally { cleanup(root); } + assert.equal(classifyProtected('AGENTS.md').kind, 'agent-instructions'); + assert.equal(classifyProtected('.cursorrules').kind, 'agent-instructions'); }); -test('buildPreToolUseDenial: out-of-scope → full metadata + alternatives', () => { - const root = makeTempRepo(); - try { - writeTask(root, 'staking', { description: 'stk', allowed: ['packages/evm-module/**'] }); - const task = { id: 'sync', description: 'Sync', - allowed: ['packages/sync/**'], exemptions: ['**/dist/**'] }; - const { message } = buildPreToolUseDenial({ - tool: 'StrReplace', deniedPath: 'packages/evm-module/contracts/S.sol', - decision: 'deny', task, taskId: 'sync', root, - }); - const p = extractJson(message); - assert.equal(p.reason, 'out-of-scope'); - assert.equal(p.deniedPath, 'packages/evm-module/contracts/S.sol'); - assert.equal(p.activeTask, 'sync'); - assert.deepEqual(p.allowed, ['packages/sync/**']); - assert.deepEqual(p.exemptions, ['**/dist/**']); - assert.equal(p.suggestedGlob, 'packages/evm-module/contracts/**'); - assert.equal(p.alternativeTasks.length, 1); - assert.equal(p.alternativeTasks[0].id, 'staking'); - const ids = p.options.map(o => o.id); - assert.ok(ids.includes('add_file')); - assert.ok(ids.includes('switch_task_staking')); - assert.ok(ids.includes('custom_instruction')); - assert.equal(p.recommendedOptionId, 'add_glob'); - assert.equal(p.agentReasoning, null); - // Human-sounding summary instead of the old ALL-CAPS banner. - assert.ok(!message.includes('OUT OF TASK SCOPE'), - 'prose no longer uses the ALL-CAPS banner'); - assert.ok(typeof p.humanSummary === 'string' && p.humanSummary.length < 400); - assert.ok(p.humanSummary.includes('packages/evm-module/contracts/S.sol')); - assert.ok(p.humanSummary.includes('sync'), 'summary mentions the active task'); - } finally { cleanup(root); } +test('classifyProtected: unknown path → unknown kind', () => { + assert.equal(classifyProtected('something/random').kind, 'unknown'); }); -test('buildPreToolUseDenial: message has both fences and is JSON-parseable', () => { - const root = makeTempRepo(); - try { - const { message } = buildPreToolUseDenial({ - tool: 'Write', deniedPath: '.cursor/hooks/y.mjs', - decision: 'protected', task: null, taskId: null, root, - }); - assert.ok(message.includes(DENIAL_FENCE_START)); - assert.ok(message.includes(DENIAL_FENCE_END)); - const p = extractJson(message); - assert.equal(p.version, 1); - } finally { cleanup(root); } -}); - -// --------------------------------------------------------------------------- -// buildLoadErrorDenial -// --------------------------------------------------------------------------- +// --- option menus --------------------------------------------------------- -test('buildLoadErrorDenial: structured with error + menu', () => { - const { message, structured } = buildLoadErrorDenial({ - taskId: 'my-task', error: 'Unexpected token', +test('buildOutOfScopeOptions: contains new_task_glob + new_task_file + skip + cancel + custom', () => { + const opts = buildOutOfScopeOptions({ + deniedPath: 'packages/foo/bar.ts', + activeTaskUris: ['urn:dkg:task:other'], }); - const p = extractJson(message); - assert.equal(p.hook, 'preToolUse'); - assert.equal(p.reason, 'manifest-load-error'); - assert.equal(p.activeTask, 'my-task'); - assert.equal(p.error, 'Unexpected token'); - assert.deepEqual( - p.options.map(o => o.id), - ['fix_manifest', 'clear_task', 'cancel', 'custom_instruction'], - ); - assert.equal(p.recommendedOptionId, 'fix_manifest'); - assert.equal(structured.error, 'Unexpected token'); -}); - -// --------------------------------------------------------------------------- -// buildShellPrecheckDenial -// --------------------------------------------------------------------------- - -test('buildShellPrecheckDenial: protected violation → protected menu', () => { - const root = makeTempRepo(); - try { - const task = null; - const violations = [ - { sub: 'rm -rf .cursor/hooks', cmd: 'rm', path: '.cursor/hooks', decision: 'protected (covers)' }, - ]; - const { message } = buildShellPrecheckDenial({ - command: 'rm -rf .cursor/hooks', violations, task, taskId: null, root, - }); - const p = extractJson(message); - assert.equal(p.hook, 'beforeShellExecution'); - assert.equal(p.reason, 'protected'); - assert.equal(p.command, 'rm -rf .cursor/hooks'); - assert.equal(p.violations.length, 1); - assert.deepEqual( - p.options.map(o => o.id), - ['bootstrap', 'cancel', 'skip', 'custom_instruction'], - ); - assert.equal(p.recommendedOptionId, 'cancel'); - } finally { cleanup(root); } + const ids = opts.map((o) => o.id); + assert.deepEqual(ids, ['new_task_glob', 'new_task_file', 'skip', 'cancel', 'custom_instruction']); + const tg = opts.find((o) => o.id === 'new_task_glob'); + assert.equal(tg.action.kind, 'new_in_progress_task'); + assert.deepEqual(tg.action.suggestedScopedToPath, ['packages/foo/**']); + const tf = opts.find((o) => o.id === 'new_task_file'); + assert.deepEqual(tf.action.suggestedScopedToPath, ['packages/foo/bar.ts']); +}); + +test('buildProtectedOptions: bootstrap is the recommendation; no add_to_manifest', () => { + const opts = buildProtectedOptions({ deniedPath: '.cursor/hooks.json' }); + const ids = opts.map((o) => o.id); + assert.deepEqual(ids, ['bootstrap', 'cancel', 'skip', 'custom_instruction']); + assert.equal(opts[0].action.kind, 'bootstrap'); + assert.match(opts[0].action.instruction, /agent-scope\/\.bootstrap-token/); }); -test('buildShellPrecheckDenial: pure out-of-scope → full menu', () => { - const root = makeTempRepo(); - try { - writeTask(root, 'other', { allowed: ['packages/evm-module/**'] }); - const task = { id: 'sync', allowed: ['packages/sync/**'] }; - const violations = [ - { sub: 'rm packages/evm-module/contracts/x.sol', cmd: 'rm', - path: 'packages/evm-module/contracts/x.sol', decision: 'deny' }, - ]; - const { message } = buildShellPrecheckDenial({ - command: 'rm packages/evm-module/contracts/x.sol', - violations, task, taskId: 'sync', root, - }); - const p = extractJson(message); - assert.equal(p.reason, 'out-of-scope'); - assert.equal(p.suggestedFix.includes('packages/evm-module/contracts/**'), true); - const ids = p.options.map(o => o.id); - assert.ok(ids.includes('add_file')); - assert.ok(ids.includes('switch_task_other')); - } finally { cleanup(root); } +test('buildResolutionErrorOptions: daemon-unreachable surfaces restart_daemon', () => { + const opts = buildResolutionErrorOptions({ reason: 'daemon-unreachable' }); + const ids = opts.map((o) => o.id); + assert.ok(ids.includes('restart_daemon')); + assert.match(opts[0].action.instruction, /dkg start/); }); -test('buildShellPrecheckDenial: mixed protected+out-of-scope → protected wins', () => { - const root = makeTempRepo(); - try { - const task = { id: 'x', allowed: ['only/**'] }; - const violations = [ - { sub: '1', cmd: 'rm', path: 'other/file.ts', decision: 'deny' }, - { sub: '2', cmd: 'rm', path: '.cursor/hooks/x.mjs', decision: 'protected' }, - ]; - const { message } = buildShellPrecheckDenial({ - command: '...', violations, task, taskId: 'x', root, - }); - const p = extractJson(message); - assert.equal(p.reason, 'protected'); - assert.deepEqual( - p.options.map(o => o.id), - ['bootstrap', 'cancel', 'skip', 'custom_instruction'], - ); - } finally { cleanup(root); } +test('buildResolutionErrorOptions: configuration-error surfaces configure_dkg', () => { + const opts = buildResolutionErrorOptions({ reason: 'configuration-error' }); + const ids = opts.map((o) => o.id); + assert.ok(ids.includes('configure_dkg')); }); -// --------------------------------------------------------------------------- -// buildAfterShellContext -// --------------------------------------------------------------------------- +// --- preToolUse: out-of-scope -------------------------------------------- -test('buildAfterShellContext: reverted + deleted in message', () => { - const root = makeTempRepo(); - try { - const { message } = buildAfterShellContext({ - command: 'whatever', task: { id: 'sync' }, taskId: 'sync', root, - reverted: ['packages/other/x.ts'], - deleted: ['.cursor/hooks/bad.mjs'], - unreverted: [], - }); - assert.ok(message.includes('Reverted:')); - assert.ok(message.includes('Deleted:')); - assert.ok(message.includes('packages/other/x.ts')); - assert.ok(message.includes('.cursor/hooks/bad.mjs')); - const p = extractJson(message); - assert.equal(p.hook, 'afterShellExecution'); - assert.equal(p.reason, 'protected'); // protected detected in deleted[] - assert.deepEqual(p.reverted, ['packages/other/x.ts']); - assert.deepEqual(p.deleted, ['.cursor/hooks/bad.mjs']); - } finally { cleanup(root); } +test('buildPreToolUseDenial: out-of-scope payload is well-formed', () => { + const t = inProgressTask(['src/**']); + const { message, structured } = buildPreToolUseDenial({ + tool: 'Write', deniedPath: 'packages/foo/bar.ts', decision: 'deny', + task: t, taskId: t.id, + }); + assert.match(message, /^agent-scope:/); + const j = extractJSON(message); + assert.deepEqual(j, structured); // message embeds the same payload + assert.equal(j.hook, 'preToolUse'); + assert.equal(j.reason, 'out-of-scope'); + assert.equal(j.tool, 'Write'); + assert.equal(j.deniedPath, 'packages/foo/bar.ts'); + assert.deepEqual(j.activeTaskUris, ['urn:dkg:task:demo']); + // simpleOptions = recommended + custom_instruction + assert.equal(j.simpleOptions.length, 2); + assert.equal(j.simpleOptions[0].id, j.recommendedOptionId); + assert.equal(j.simpleOptions[1].id, 'custom_instruction'); + assert.equal(j.recommendedOptionId, 'new_task_glob'); +}); + +test('buildPreToolUseDenial: humanSummary mentions the path and that no task covers it', () => { + const t = inProgressTask(['src/**']); + const { message } = buildPreToolUseDenial({ + tool: 'Write', deniedPath: 'packages/foo/bar.ts', decision: 'deny', + task: t, taskId: t.id, + }); + const j = extractJSON(message); + assert.match(j.humanSummary, /packages\/foo\/bar\.ts/); + assert.match(j.humanSummary, /doesn't cover/); }); -test('buildAfterShellContext: no protected → out-of-scope menu', () => { - const root = makeTempRepo(); - try { - const { message } = buildAfterShellContext({ - command: 'x', task: { id: 'sync' }, taskId: 'sync', root, - reverted: ['packages/other/x.ts'], - deleted: [], unreverted: [], - }); - const p = extractJson(message); - assert.equal(p.reason, 'out-of-scope'); - assert.ok(p.options.some(o => o.id === 'add_file')); - } finally { cleanup(root); } -}); +// --- preToolUse: protected ------------------------------------------------ -test('buildAfterShellContext: nothing touched → unknown menu', () => { - const root = makeTempRepo(); - try { - const { message } = buildAfterShellContext({ - command: 'x', task: null, taskId: null, root, - reverted: [], deleted: [], unreverted: [], - }); - const p = extractJson(message); - assert.equal(p.reason, 'unknown'); - assert.ok(p.options.some(o => o.id === 'acknowledge')); - } finally { cleanup(root); } +test('buildPreToolUseDenial: protected payload is well-formed', () => { + const { message, structured } = buildPreToolUseDenial({ + tool: 'Write', deniedPath: '.cursor/hooks.json', decision: 'protected', + task: null, taskId: null, + }); + const j = extractJSON(message); + assert.deepEqual(j, structured); + assert.equal(j.reason, 'protected'); + assert.equal(j.protectedKind, 'cursor-hook'); + assert.equal(j.simpleOptions[0].id, 'cancel'); // recommend safety + assert.equal(j.simpleOptions[1].id, 'custom_instruction'); + // Verbose options always include bootstrap as an explicit choice. + assert.ok(j.options.find((o) => o.id === 'bootstrap')); }); -// --------------------------------------------------------------------------- -// Structural invariants (all builders) -// --------------------------------------------------------------------------- +// --- resolution-error denial --------------------------------------------- -test('every builder emits version:1 and well-formed options', () => { - const root = makeTempRepo(); - try { - const cases = [ - buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', - task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), - buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', - decision: 'protected', task: null, taskId: null, root }), - buildLoadErrorDenial({ taskId: 't', error: 'bad' }), - buildShellPrecheckDenial({ command: 'rm x', - violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], - task: { id: 't' }, taskId: 't', root }), - buildAfterShellContext({ command: 'x', - task: { id: 't' }, taskId: 't', root, - reverted: ['a.ts'], deleted: [], unreverted: [] }), - ]; - for (const { message, structured } of cases) { - const p = extractJson(message); - assert.equal(p.version, 1); - assert.ok(Array.isArray(p.options)); - assert.ok(p.options.length >= 2); - for (const opt of p.options) { - assert.ok(typeof opt.id === 'string' && opt.id.length > 0); - assert.ok(typeof opt.label === 'string' && opt.label.length > 0); - assert.ok(opt.action && typeof opt.action.kind === 'string'); - } - assert.equal(structured.version, 1); - } - } finally { cleanup(root); } +test('buildResolutionErrorDenial: daemon-unreachable', () => { + const { message } = buildResolutionErrorDenial({ + reason: 'daemon-unreachable', diagnostic: 'connection refused', + }); + const j = extractJSON(message); + assert.equal(j.reason, 'daemon-unreachable'); + assert.equal(j.simpleOptions[0].id, 'restart_daemon'); + assert.match(j.humanSummary, /daemon/i); }); -test('every denial builder sets recommendedOptionId to a valid option', () => { - const root = makeTempRepo(); - try { - const cases = [ - buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', - task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), - buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', - decision: 'protected', task: null, taskId: null, root }), - buildLoadErrorDenial({ taskId: 't', error: 'bad' }), - buildShellPrecheckDenial({ command: 'rm x', - violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], - task: { id: 't' }, taskId: 't', root }), - buildAfterShellContext({ command: 'x', - task: { id: 't' }, taskId: 't', root, - reverted: ['a.ts'], deleted: [], unreverted: [] }), - ]; - for (const { message } of cases) { - const p = extractJson(message); - assert.ok( - typeof p.recommendedOptionId === 'string' && p.recommendedOptionId.length, - 'recommendedOptionId is a non-empty string', - ); - const ids = p.options.map(o => o.id); - assert.ok( - ids.includes(p.recommendedOptionId), - `recommended "${p.recommendedOptionId}" must be in the options list`, - ); - assert.equal(p.agentReasoning, null, - 'agentReasoning is a null placeholder the agent fills in via AskQuestion prompt'); - } - } finally { cleanup(root); } +test('buildLoadErrorDenial: legacy alias maps to configuration-error', () => { + const { message } = buildLoadErrorDenial({ taskId: 'demo', error: 'boom' }); + const j = extractJSON(message); + assert.equal(j.reason, 'configuration-error'); }); -// --------------------------------------------------------------------------- -// simpleOptions — the two-option plan-mode surface -// --------------------------------------------------------------------------- - -test('simpleOptions: exactly two entries (recommended + custom) on every builder', () => { - const root = makeTempRepo(); - try { - const cases = [ - buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', - task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), - buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', - decision: 'protected', task: null, taskId: null, root }), - buildLoadErrorDenial({ taskId: 't', error: 'bad' }), - buildShellPrecheckDenial({ command: 'rm x', - violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], - task: { id: 't' }, taskId: 't', root }), - buildAfterShellContext({ command: 'x', - task: { id: 't' }, taskId: 't', root, - reverted: ['a.ts'], deleted: [], unreverted: [] }), - ]; - for (const { message } of cases) { - const p = extractJson(message); - assert.ok(Array.isArray(p.simpleOptions), 'simpleOptions is an array'); - assert.equal(p.simpleOptions.length, 2, - 'simpleOptions always has exactly two entries (recommended + custom)'); - const [rec, custom] = p.simpleOptions; - assert.equal(rec.id, p.recommendedOptionId, - 'first simple option matches recommendedOptionId'); - assert.equal(custom.id, 'custom_instruction', - 'second simple option is the custom free-text fallback'); - assert.equal(custom.action.kind, 'custom'); - for (const opt of p.simpleOptions) { - assert.ok(typeof opt.id === 'string' && opt.id.length); - assert.ok(typeof opt.label === 'string' && opt.label.length); - assert.ok(opt.action && typeof opt.action.kind === 'string'); - } - } - } finally { cleanup(root); } -}); - -test('simpleOptions: recommended labels are short and natural', () => { - const root = makeTempRepo(); - try { - // out-of-scope → recommended is add_glob → "Add this folder..." - const { message: m1 } = buildPreToolUseDenial({ tool: 'Write', - deniedPath: 'packages/foo/bar.ts', decision: 'deny', - task: { id: 't', allowed: ['other/**'] }, taskId: 't', root }); - const p1 = extractJson(m1); - assert.equal(p1.simpleOptions[0].label, 'Add this folder to the task and try again'); - - // protected → recommended is cancel → "Skip it" - const { message: m2 } = buildPreToolUseDenial({ tool: 'Write', - deniedPath: '.cursor/hooks/x.mjs', decision: 'protected', - task: null, taskId: null, root }); - const p2 = extractJson(m2); - assert.equal(p2.simpleOptions[0].label, 'Skip it'); +// --- shell-precheck ------------------------------------------------------- - // custom label is the natural one too - assert.equal(p2.simpleOptions[1].label, 'Type what you want instead'); - } finally { cleanup(root); } +test('buildShellPrecheckDenial: protected violation', () => { + const { message } = buildShellPrecheckDenial({ + command: 'rm -rf .cursor/hooks', + violations: [{ cmd: 'rm', path: '.cursor/hooks', decision: 'protected (covers)' }], + task: null, taskId: null, + }); + const j = extractJSON(message); + assert.equal(j.reason, 'protected'); + assert.equal(j.simpleOptions[0].id, 'cancel'); + assert.match(j.humanSummary, /Blocked/); +}); + +test('buildShellPrecheckDenial: out-of-scope violation', () => { + const t = inProgressTask(['src/**']); + const { message } = buildShellPrecheckDenial({ + command: 'rm packages/foo/bar.ts', + violations: [{ cmd: 'rm', path: 'packages/foo/bar.ts', decision: 'deny' }], + task: t, taskId: t.id, + }); + const j = extractJSON(message); + assert.equal(j.reason, 'out-of-scope'); + assert.equal(j.simpleOptions[0].id, 'new_task_glob'); }); -// --------------------------------------------------------------------------- -// humanSummary — short, natural, quotable by the agent -// --------------------------------------------------------------------------- +// --- afterShell ----------------------------------------------------------- -test('humanSummary: present, short, no banners, no agent-directed meta copy', () => { - const root = makeTempRepo(); - try { - const cases = [ - buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', - task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), - buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', - decision: 'protected', task: null, taskId: null, root }), - buildLoadErrorDenial({ taskId: 't', error: 'bad' }), - buildShellPrecheckDenial({ command: 'rm x', - violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], - task: { id: 't' }, taskId: 't', root }), - buildAfterShellContext({ command: 'x', - task: { id: 't' }, taskId: 't', root, - reverted: ['a.ts'], deleted: [], unreverted: [] }), - ]; - for (const { message } of cases) { - const p = extractJson(message); - assert.ok(typeof p.humanSummary === 'string' && p.humanSummary.length > 0); - assert.ok(p.humanSummary.length <= 400, - `humanSummary is concise (<= 400 chars): "${p.humanSummary}"`); - // No ALL-CAPS banners. - assert.ok(!/PROTECTED PATH|OUT OF TASK SCOPE|STOP\b/.test(p.humanSummary), - 'humanSummary has no ALL-CAPS banners'); - // No agent-directed meta copy. - assert.ok(!/surface .* menu|via AskQuestion/i.test(p.humanSummary), - 'humanSummary is not agent-directed meta copy'); - } - } finally { cleanup(root); } +test('buildAfterShellContext: reverted + deleted summary', () => { + const t = inProgressTask(['src/**']); + const { message } = buildAfterShellContext({ + command: 'noop', + task: t, taskId: t.id, + reverted: ['packages/foo/bar.ts'], + deleted: ['packages/foo/junk.ts'], + unreverted: [], + }); + const j = extractJSON(message); + assert.equal(j.hook, 'afterShellExecution'); + assert.equal(j.reason, 'out-of-scope'); + assert.deepEqual(j.reverted, ['packages/foo/bar.ts']); + assert.deepEqual(j.deleted, ['packages/foo/junk.ts']); + assert.match(j.humanSummary, /reverted 1 file/); + assert.match(j.humanSummary, /deleted 1 new file/); +}); + +test('buildAfterShellContext: nothing touched → benign summary', () => { + const { message } = buildAfterShellContext({ + command: 'noop', task: null, taskId: null, + reverted: [], deleted: [], unreverted: [], + }); + const j = extractJSON(message); + assert.equal(j.reason, 'unknown'); + assert.match(j.humanSummary, /finished cleanly/); }); -test('custom_instruction option appears in every denial menu', () => { - const root = makeTempRepo(); - try { - const cases = [ - buildPreToolUseDenial({ tool: 'Write', deniedPath: 'a/b.ts', decision: 'deny', - task: { id: 't', allowed: ['c/**'] }, taskId: 't', root }), - buildPreToolUseDenial({ tool: 'Write', deniedPath: '.cursor/hooks/x.mjs', - decision: 'protected', task: null, taskId: null, root }), - buildLoadErrorDenial({ taskId: 't', error: 'bad' }), - buildShellPrecheckDenial({ command: 'rm x', - violations: [{ cmd: 'rm', path: 'x', decision: 'deny' }], - task: { id: 't' }, taskId: 't', root }), - buildAfterShellContext({ command: 'x', - task: { id: 't' }, taskId: 't', root, - reverted: ['a.ts'], deleted: [], unreverted: [] }), - ]; - for (const { message } of cases) { - const p = extractJson(message); - const custom = p.options.find(o => o.id === 'custom_instruction'); - assert.ok(custom, 'custom_instruction present in every denial menu'); - assert.equal(custom.action.kind, 'custom'); - } - } finally { cleanup(root); } +test('buildAfterShellContext: protected file touched → protected reason', () => { + const { message } = buildAfterShellContext({ + command: 'echo hi', task: null, taskId: null, + reverted: [], deleted: ['.cursor/hooks/scope-guard.mjs'], + unreverted: [], + }); + const j = extractJSON(message); + assert.equal(j.reason, 'protected'); + assert.equal(j.simpleOptions[0].id, 'cancel'); }); diff --git a/agent-scope/lib/dkg-source.mjs b/agent-scope/lib/dkg-source.mjs new file mode 100644 index 000000000..0cbbfbfb2 --- /dev/null +++ b/agent-scope/lib/dkg-source.mjs @@ -0,0 +1,383 @@ +// agent-scope/lib/dkg-source.mjs +// +// Resolves the agent's "active scope" from the local DKG daemon, replacing +// the legacy file-based flow (`agent-scope/active` + `agent-scope/tasks/*.json`). +// +// Source of truth: `tasks:Task` entities authored by the agent on this +// project's `tasks` sub-graph. A task is *active* (and therefore contributes +// its `tasks:scopedToPath` globs to the live allow-list) when: +// +// 1. Its current `tasks:status` is `"in_progress"`. (See note on +// replace-semantics below.) +// 2. It is attributed to THIS agent's URI via `prov:wasAttributedTo`. +// +// Multiple in_progress tasks attributed to the same agent → the union of +// their `tasks:scopedToPath` globs forms the live scope. Zero in_progress +// tasks → no active scope (the legacy "no task = anything goes (except +// protected)" default applies). +// +// Replace semantics: `dkg_add_task` and `dkg_update_task_status` both write +// `tasks:status` into a dedicated assertion (`task-status-`) that +// is `discardAssertion`'d before each write. So the daemon's main /query +// endpoint sees exactly one `tasks:status` triple per task at any given +// moment — no need for the SPARQL query itself to disambiguate by timestamp. +// (See packages/mcp-dkg/src/tools/writes.ts for the matching write code.) +// +// Cache: hooks fire many times per session and a SPARQL round-trip costs +// ~30–80ms; we cache the resolved scope for CACHE_TTL_MS in +// ~/.cache/agent-scope/scope--.json. Cache is keyed +// off both project and agent so multi-project / multi-operator setups don't +// cross-pollinate. + +import { readFileSync, mkdirSync, writeFileSync, existsSync, statSync } from 'node:fs'; +import { resolve, dirname, isAbsolute } from 'node:path'; +import os from 'node:os'; +import path from 'node:path'; + +const DEFAULT_API = 'http://localhost:9200'; +const CACHE_TTL_MS = 5_000; +const QUERY_TIMEOUT_MS = 4_000; +const CACHE_DIR = path.join(os.homedir(), '.cache', 'agent-scope'); + +// --------------------------------------------------------------------------- +// .dkg/config.yaml loader (slim, hook-friendly — no deps) +// --------------------------------------------------------------------------- +// +// Walks upward from `start` looking for `.dkg/config.yaml`. Same shape as +// the canonical TS loader in `packages/mcp-dkg/src/config.ts` but inlined +// here so agent-scope stays a zero-runtime-dep library that works from any +// hook context. + +function findConfigFile(start) { + let dir = path.resolve(start); + const root = path.parse(dir).root; + for (let i = 0; i < 64; i++) { + const candidate = path.join(dir, '.dkg', 'config.yaml'); + if (existsSync(candidate)) return candidate; + if (dir === root) return null; + const parent = path.dirname(dir); + if (parent === dir) return null; + dir = parent; + } + return null; +} + +// Hand-rolled subset of YAML good enough for `.dkg/config.yaml`. Mirrors +// the parser in `packages/mcp-dkg/hooks/capture-chat.mjs` so behaviour stays +// consistent. Two-space indented mapping, scalar leaves, optional quotes. +export function parseDotDkgConfig(yamlText) { + const lines = String(yamlText || '').split(/\r?\n/); + const cfg = { node: {}, agent: {}, capture: {} }; + const stack = [cfg]; + const indents = [-1]; + for (const rawLine of lines) { + const line = rawLine.replace(/#.*$/, '').replace(/\s+$/, ''); + if (!line.trim()) continue; + const indent = line.match(/^ */)[0].length; + while (indents.length > 1 && indent <= indents[indents.length - 1]) { + stack.pop(); + indents.pop(); + } + const m = line.trim().match(/^([A-Za-z_][A-Za-z0-9_]*)\s*:\s*(.*)$/); + if (!m) continue; + const key = m[1]; + const valRaw = m[2]; + const parent = stack[stack.length - 1]; + if (valRaw === '' || valRaw === undefined) { + parent[key] = {}; + stack.push(parent[key]); + indents.push(indent); + } else { + const val = valRaw.replace(/^["']|["']$/g, '').trim(); + if (val === 'true') parent[key] = true; + else if (val === 'false') parent[key] = false; + else if (/^-?\d+$/.test(val)) parent[key] = parseInt(val, 10); + else parent[key] = val; + } + } + return cfg; +} + +function expandHome(p) { + if (!p) return p; + if (p === '~') return os.homedir(); + if (p.startsWith('~/')) return path.join(os.homedir(), p.slice(2)); + return p; +} + +function readTokenFile(filePath, configDir) { + try { + const expanded = expandHome(filePath); + const abs = isAbsolute(expanded) ? expanded : resolve(configDir, expanded); + const raw = readFileSync(abs, 'utf8'); + const line = raw.split('\n').find((l) => l.trim() && !l.startsWith('#')); + return line ? line.trim() : null; + } catch { + return null; + } +} + +export function loadDkgWorkspaceConfig(repoRoot) { + const cwd = repoRoot || process.cwd(); + const cfgPath = findConfigFile(cwd); + let fromFile = { node: {}, agent: {}, capture: {} }; + if (cfgPath) { + try { + fromFile = parseDotDkgConfig(readFileSync(cfgPath, 'utf8')); + } catch { + /* malformed yaml — fall through to env */ + } + } + const envApi = process.env.DKG_API ?? process.env.DEVNET_API; + const envToken = process.env.DKG_TOKEN ?? process.env.DEVNET_TOKEN ?? process.env.DKG_AUTH; + const envProject = process.env.DKG_PROJECT; + const envAgent = process.env.DKG_AGENT_URI; + + // Token resolution: literal `node.token` wins, then `node.tokenFile`, then env. + let token = fromFile.node?.token || ''; + if (!token && fromFile.node?.tokenFile && cfgPath) { + token = readTokenFile(fromFile.node.tokenFile, dirname(cfgPath)) || ''; + } + if (!token) token = envToken || ''; + + // File wins over env for project/api/agent (matches the TS loader's policy). + return { + api: fromFile.node?.api || envApi || DEFAULT_API, + token, + projectId: fromFile.contextGraph || fromFile.project || envProject || null, + agentUri: fromFile.agent?.uri || envAgent || null, + sourcePath: cfgPath, + }; +} + +// --------------------------------------------------------------------------- +// SPARQL query +// --------------------------------------------------------------------------- + +const SCOPE_QUERY = ` +PREFIX tasks: +PREFIX prov: +PREFIX dcterms: +PREFIX rdfs: + +SELECT ?task ?title ?modified ?scope WHERE { + ?task a tasks:Task ; + tasks:status "in_progress" ; + prov:wasAttributedTo ?AGENT . + OPTIONAL { ?task rdfs:label ?title } + OPTIONAL { ?task dcterms:modified ?modified } + OPTIONAL { ?task tasks:scopedToPath ?scope } +} +`; + +function bindingValue(cell) { + if (cell == null) return ''; + if (typeof cell === 'string') return cell; + return cell.value ?? ''; +} + +async function fetchWithTimeout(url, opts, timeoutMs) { + const controller = new AbortController(); + const t = setTimeout(() => controller.abort(), timeoutMs); + try { + return await fetch(url, { ...opts, signal: controller.signal }); + } finally { + clearTimeout(t); + } +} + +async function querySparqlForActiveTasks({ api, token, projectId, agentUri }) { + const body = { + sparql: SCOPE_QUERY.replace(/\?AGENT/g, `<${agentUri}>`), + contextGraphId: projectId, + includeSharedMemory: true, + }; + const headers = { 'Content-Type': 'application/json' }; + if (token) headers.Authorization = `Bearer ${token}`; + const res = await fetchWithTimeout( + `${api.replace(/\/$/, '')}/api/query`, + { method: 'POST', headers, body: JSON.stringify(body) }, + QUERY_TIMEOUT_MS, + ); + if (!res.ok) { + throw new Error(`daemon ${api} → HTTP ${res.status} ${res.statusText}`); + } + const json = await res.json(); + const bindings = json?.result?.bindings ?? []; + + // Aggregate per-task: SPARQL returns one row per (task, scope) so we + // group on task URI and collect the scope list. + const byTask = new Map(); + for (const b of bindings) { + const uri = bindingValue(b.task); + if (!uri) continue; + if (!byTask.has(uri)) { + byTask.set(uri, { + uri, + title: bindingValue(b.title) || uri, + modified: bindingValue(b.modified) || null, + scopedToPath: [], + }); + } + const entry = byTask.get(uri); + const scope = bindingValue(b.scope); + if (scope && !entry.scopedToPath.includes(scope)) entry.scopedToPath.push(scope); + } + return Array.from(byTask.values()); +} + +// --------------------------------------------------------------------------- +// Cache +// --------------------------------------------------------------------------- + +function cachePathFor(projectId, agentUri) { + // Mangle both into filesystem-safe suffixes; cap length so absurdly long + // URIs don't break filename limits. + const proj = String(projectId || '').replace(/[^A-Za-z0-9._-]+/g, '_').slice(0, 80); + const agent = String(agentUri || '').replace(/[^A-Za-z0-9._-]+/g, '_').slice(0, 80); + return path.join(CACHE_DIR, `scope-${proj}-${agent}.json`); +} + +function readCache(filePath) { + try { + const stat = statSync(filePath); + if (Date.now() - stat.mtimeMs > CACHE_TTL_MS) return null; + const raw = readFileSync(filePath, 'utf8'); + return JSON.parse(raw); + } catch { + return null; + } +} + +function writeCache(filePath, payload) { + try { + mkdirSync(dirname(filePath), { recursive: true }); + writeFileSync(filePath, JSON.stringify(payload), 'utf8'); + } catch { + /* cache write is best-effort; never fail the hook */ + } +} + +// --------------------------------------------------------------------------- +// Public surface +// --------------------------------------------------------------------------- + +/** + * Resolve the active agent-scope from the DKG. + * + * Returns: + * { + * agentUri, projectId, + * tasks: [{ uri, title, modified, scopedToPath: [..globs] }], + * allowed: [..unioned positive globs], + * exemptions: [..unioned bang-prefixed globs], + * reason: 'ok' | 'no-config' | 'no-agent' | 'no-project' | + * 'daemon-unreachable' | 'no-active-task', + * diagnostic?: string, + * fromCache: boolean, + * } + * + * `reason` is what the hook should surface to the user when scope + * resolution didn't yield active tasks. The hook NEVER throws — fail-open + * for daemon unreachable (treated as no active task) so a daemon outage + * doesn't bleed into the agent's tooling. + */ +export async function resolveDkgScope({ root, force = false } = {}) { + const cfg = loadDkgWorkspaceConfig(root); + if (!cfg.sourcePath && !cfg.projectId && !cfg.agentUri) { + return makeEmpty(cfg, 'no-config', 'No `.dkg/config.yaml` found in the workspace and no DKG_PROJECT/DKG_AGENT_URI in env. agent-scope guard is in soft mode (only protected paths blocked).'); + } + if (!cfg.projectId) { + return makeEmpty(cfg, 'no-project', 'No `contextGraph:` pinned in `.dkg/config.yaml` (and no DKG_PROJECT in env). agent-scope guard is in soft mode (only protected paths blocked).'); + } + if (!cfg.agentUri) { + return makeEmpty(cfg, 'no-agent', 'No `agent.uri` configured in `.dkg/config.yaml` (and no DKG_AGENT_URI in env). agent-scope guard is in soft mode (only protected paths blocked).'); + } + + const cacheFile = cachePathFor(cfg.projectId, cfg.agentUri); + if (!force) { + const cached = readCache(cacheFile); + if (cached) return { ...cached, fromCache: true }; + } + + let tasks; + try { + tasks = await querySparqlForActiveTasks(cfg); + } catch (err) { + return makeEmpty(cfg, 'daemon-unreachable', + `DKG daemon unreachable at ${cfg.api}: ${err?.message || err}. agent-scope guard is in soft mode (only protected paths blocked) until the daemon is back.`); + } + + if (!tasks.length) { + const empty = makeEmpty(cfg, 'no-active-task', + `No \`tasks:Task\` with status "in_progress" attributed to \`${cfg.agentUri}\` on project \`${cfg.projectId}\`. Create one with \`dkg_add_task\` (status: "in_progress", scopedToPath: [...]) when you start work.`); + writeCache(cacheFile, empty); + return empty; + } + + const allowed = []; + const exemptions = []; + for (const t of tasks) { + for (const g of t.scopedToPath) { + if (typeof g !== 'string' || !g) continue; + if (g.startsWith('!')) { + if (!exemptions.includes(g)) exemptions.push(g); + } else { + if (!allowed.includes(g)) allowed.push(g); + } + } + } + + const result = { + agentUri: cfg.agentUri, + projectId: cfg.projectId, + tasks, + allowed, + exemptions, + reason: 'ok', + fromCache: false, + }; + writeCache(cacheFile, result); + return result; +} + +function makeEmpty(cfg, reason, diagnostic) { + return { + agentUri: cfg.agentUri || null, + projectId: cfg.projectId || null, + tasks: [], + allowed: [], + exemptions: [], + reason, + diagnostic, + fromCache: false, + }; +} + +/** + * Synchronous, sync-IO-only variant the hook can call when async/await + * would be inconvenient (e.g. shell-precheck reads stdin synchronously). + * Reads cache only — never queries the daemon. Falls through with `reason: + * "stale"` if the cache is missing or expired so the caller can decide + * whether to async-refresh or fail open. + */ +export function readCachedScopeSync({ root } = {}) { + const cfg = loadDkgWorkspaceConfig(root); + if (!cfg.projectId || !cfg.agentUri) { + return { ...makeEmpty(cfg, 'no-config', 'no project / agent configured'), fromCache: false, stale: false }; + } + const cacheFile = cachePathFor(cfg.projectId, cfg.agentUri); + const cached = readCache(cacheFile); + if (cached) return { ...cached, fromCache: true, stale: false }; + return { ...makeEmpty(cfg, 'no-active-task', 'cache miss / expired; resolve async first'), fromCache: false, stale: true }; +} + +/** + * Build a human-readable summary line for diagnostics / logs. + */ +export function describeScope(scope) { + if (!scope) return 'agent-scope: '; + if (scope.reason !== 'ok') return `agent-scope: ${scope.reason}${scope.diagnostic ? ` — ${scope.diagnostic}` : ''}`; + const tnames = scope.tasks.map((t) => t.title || t.uri.split(':').pop()).join(', '); + return `agent-scope: ${scope.tasks.length} active task${scope.tasks.length === 1 ? '' : 's'} (${tnames}) → ${scope.allowed.length} allow + ${scope.exemptions.length} deny globs`; +} diff --git a/agent-scope/lib/dkg-source.test.mjs b/agent-scope/lib/dkg-source.test.mjs new file mode 100644 index 000000000..50cfe4b83 --- /dev/null +++ b/agent-scope/lib/dkg-source.test.mjs @@ -0,0 +1,203 @@ +// Unit tests for dkg-source.mjs. Pure-function tests + a couple of +// no-network end-to-end checks (config loader, soft-mode fallthroughs). +// Run with: +// node --test agent-scope/lib/dkg-source.test.mjs + +import { test } from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { + parseDotDkgConfig, loadDkgWorkspaceConfig, describeScope, resolveDkgScope, +} from './dkg-source.mjs'; + +function makeWorkspace({ projectId, agentUri, api, token, tokenFile } = {}) { + const root = mkdtempSync(join(tmpdir(), 'dkg-source-test-')); + mkdirSync(join(root, '.dkg'), { recursive: true }); + const lines = []; + if (api) lines.push(`node:`, ` api: "${api}"`); + if (token) { + if (!api) lines.push('node:'); + lines.push(` token: "${token}"`); + } + if (tokenFile) { + if (!api && !token) lines.push('node:'); + lines.push(` tokenFile: "${tokenFile}"`); + } + if (projectId) lines.push(`contextGraph: "${projectId}"`); + if (agentUri) lines.push(`agent:`, ` uri: "${agentUri}"`); + writeFileSync(join(root, '.dkg', 'config.yaml'), lines.join('\n') + '\n'); + return root; +} + +// --- parseDotDkgConfig ---------------------------------------------------- + +test('parseDotDkgConfig: simple top-level scalars', () => { + const c = parseDotDkgConfig('contextGraph: "urn:proj:demo"\nproject: ignored\n'); + assert.equal(c.contextGraph, 'urn:proj:demo'); + assert.equal(c.project, 'ignored'); +}); + +test('parseDotDkgConfig: nested two-space mapping', () => { + const c = parseDotDkgConfig([ + 'node:', + ' api: "http://localhost:9200"', + ' token: "abc"', + 'agent:', + ' uri: "urn:agent:demo"', + ].join('\n')); + assert.equal(c.node.api, 'http://localhost:9200'); + assert.equal(c.node.token, 'abc'); + assert.equal(c.agent.uri, 'urn:agent:demo'); +}); + +test('parseDotDkgConfig: comments and blank lines ignored', () => { + const c = parseDotDkgConfig([ + '# top comment', + 'contextGraph: "p" # trailing comment', + '', + 'agent:', + ' uri: "u"', + ].join('\n')); + assert.equal(c.contextGraph, 'p'); + assert.equal(c.agent.uri, 'u'); +}); + +test('parseDotDkgConfig: integer + boolean coercion', () => { + const c = parseDotDkgConfig('node:\n port: 9200\n tls: true\n'); + assert.equal(c.node.port, 9200); + assert.equal(c.node.tls, true); +}); + +test('parseDotDkgConfig: malformed input returns shape with empty groups', () => { + const c = parseDotDkgConfig('not yaml at all'); + assert.deepEqual(c, { node: {}, agent: {}, capture: {} }); +}); + +// --- loadDkgWorkspaceConfig ----------------------------------------------- + +test('loadDkgWorkspaceConfig: reads YAML when present', () => { + const root = makeWorkspace({ + projectId: 'urn:proj:demo', + agentUri: 'urn:agent:demo', + api: 'http://localhost:9999', + token: 'tok', + }); + try { + const cfg = loadDkgWorkspaceConfig(root); + assert.equal(cfg.api, 'http://localhost:9999'); + assert.equal(cfg.token, 'tok'); + assert.equal(cfg.projectId, 'urn:proj:demo'); + assert.equal(cfg.agentUri, 'urn:agent:demo'); + assert.match(cfg.sourcePath || '', /config\.yaml$/); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('loadDkgWorkspaceConfig: env fallback when no YAML', () => { + const root = mkdtempSync(join(tmpdir(), 'dkg-source-test-')); + const prev = { + p: process.env.DKG_PROJECT, a: process.env.DKG_AGENT_URI, + api: process.env.DKG_API, tok: process.env.DKG_TOKEN, + }; + try { + process.env.DKG_PROJECT = 'urn:env:p'; + process.env.DKG_AGENT_URI = 'urn:env:a'; + process.env.DKG_API = 'http://localhost:1234'; + process.env.DKG_TOKEN = 'env-token'; + const cfg = loadDkgWorkspaceConfig(root); + assert.equal(cfg.projectId, 'urn:env:p'); + assert.equal(cfg.agentUri, 'urn:env:a'); + assert.equal(cfg.api, 'http://localhost:1234'); + assert.equal(cfg.token, 'env-token'); + assert.equal(cfg.sourcePath, null); + } finally { + Object.entries(prev).forEach(([k, v]) => { + const envKey = { p: 'DKG_PROJECT', a: 'DKG_AGENT_URI', api: 'DKG_API', tok: 'DKG_TOKEN' }[k]; + if (v === undefined) delete process.env[envKey]; else process.env[envKey] = v; + }); + rmSync(root, { recursive: true, force: true }); + } +}); + +test('loadDkgWorkspaceConfig: tokenFile is read when token is empty', () => { + const root = makeWorkspace({ + projectId: 'p', agentUri: 'a', tokenFile: './secret.txt', + }); + try { + writeFileSync(join(root, '.dkg', 'secret.txt'), 'file-token\n# comment\n'); + const cfg = loadDkgWorkspaceConfig(root); + assert.equal(cfg.token, 'file-token'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +// --- resolveDkgScope (no network paths only) ------------------------------ + +test('resolveDkgScope: no config, no env → no-config soft fallthrough', async () => { + const root = mkdtempSync(join(tmpdir(), 'dkg-source-test-')); + const prev = { + p: process.env.DKG_PROJECT, a: process.env.DKG_AGENT_URI, + }; + delete process.env.DKG_PROJECT; + delete process.env.DKG_AGENT_URI; + try { + const r = await resolveDkgScope({ root, force: true }); + assert.equal(r.reason, 'no-config'); + assert.equal(r.allowed.length, 0); + assert.equal(r.exemptions.length, 0); + } finally { + if (prev.p !== undefined) process.env.DKG_PROJECT = prev.p; + if (prev.a !== undefined) process.env.DKG_AGENT_URI = prev.a; + rmSync(root, { recursive: true, force: true }); + } +}); + +test('resolveDkgScope: project but no agent → no-agent', async () => { + const root = makeWorkspace({ projectId: 'p' }); + try { + const r = await resolveDkgScope({ root, force: true }); + assert.equal(r.reason, 'no-agent'); + assert.match(r.diagnostic, /agent\.uri/); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('resolveDkgScope: agent but no project → no-project', async () => { + const root = makeWorkspace({ agentUri: 'urn:agent:x' }); + try { + const r = await resolveDkgScope({ root, force: true }); + assert.equal(r.reason, 'no-project'); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +test('resolveDkgScope: bad daemon URL → daemon-unreachable, no throw', async () => { + // Pin to a port that nothing is listening on; verifies the timeout + + // catch path that turns network errors into a soft scope. + const root = makeWorkspace({ + projectId: 'p', agentUri: 'urn:agent:x', api: 'http://127.0.0.1:1', + }); + try { + const r = await resolveDkgScope({ root, force: true }); + assert.equal(r.reason, 'daemon-unreachable'); + assert.equal(r.allowed.length, 0); + } finally { rmSync(root, { recursive: true, force: true }); } +}); + +// --- describeScope -------------------------------------------------------- + +test('describeScope: ok scope mentions task title', () => { + const s = { + reason: 'ok', + tasks: [{ uri: 'urn:dkg:task:demo', title: 'Demo' }], + allowed: ['src/**'], + exemptions: [], + }; + const out = describeScope(s); + assert.match(out, /Demo/); + assert.match(out, /1 active task/); +}); + +test('describeScope: error scope surfaces reason', () => { + const s = { reason: 'no-active-task', diagnostic: 'no in_progress task', tasks: [], allowed: [], exemptions: [] }; + const out = describeScope(s); + assert.match(out, /no-active-task/); +}); diff --git a/agent-scope/lib/onboarding.mjs b/agent-scope/lib/onboarding.mjs deleted file mode 100644 index 635b4f78e..000000000 --- a/agent-scope/lib/onboarding.mjs +++ /dev/null @@ -1,235 +0,0 @@ -// Onboarding marker + clipboard helpers for `pnpm task start`. -// -// `pnpm task start` is the single onboarding flow: the CLI captures a -// task description from the user, then drops a one-shot marker file at -// `agent-scope/.pending-onboarding`. The next message the user sends in -// any chat makes the agent read the description, explore the repo, and -// propose a scope via a plan-mode AskQuestion. -// -// Flow: -// -// 1. `pnpm task start` reads a description from the user (single-Enter -// submit; multi-line pastes captured in full), then writes the -// marker. The marker contains both the trigger text AND the user's -// description, so the agent does not need to ask "describe the -// task" again. -// 2. The user sends any message in any chat. -// 3. The marker is delivered to the agent via two kinds of consumers — -// AUTHORITATIVE (read-and-delete) and BEST-EFFORT PEEK (read-only). -// Splitting them this way avoids a race where a mid-turn peek would -// otherwise delete the marker before the agent's visible context -// picked it up. -// -// Authoritative (delete + inject): -// (a) `sessionStart` hook — fires on a brand new chat. -// (b) `UserPromptSubmit` hook — Claude Code only, fires BEFORE each -// user prompt reaches the agent. No race. -// (c) `stop` hook — Cursor only, fires at end of a -// turn. Returns the payload as `followup_message`, which -// Cursor auto-submits as the next user message. This is the -// safety net for existing Cursor chats where the agent -// replied conversationally with no tool call. -// -// Best-effort peek (read-only, NO delete): -// (d) `postToolUse` hook — Cursor + Claude Code. Fires after -// any tool call. Fast-path injection via `additional_context`. -// Does NOT delete so mid-turn injection noise is harmless and -// the authoritative consumers remain in control of the -// lifecycle. -// -// 4. The marker is explicitly deleted by the lifecycle owners once the -// flow is resolved: -// - `pnpm task create --activate` (success = "I processed this") -// - `pnpm task clear` (user abandons the flow) -// This is the final cleanup step. -// -// 5. The agent follows the "Task onboarding protocol" (CLAUDE.md, -// .cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md). -// -// Zero runtime deps. Pure-ish (spawnSync for clipboard; filesystem for marker). - -import { writeFileSync, readFileSync, existsSync, unlinkSync, rmSync } from 'node:fs'; -import { resolve } from 'node:path'; -import { spawnSync } from 'node:child_process'; -import { platform } from 'node:os'; - -export const ONBOARDING_MARKER_REL = 'agent-scope/.pending-onboarding'; -export const DESCRIPTION_OPEN = '=== USER TASK DESCRIPTION (already provided — do NOT ask again) ==='; -export const DESCRIPTION_CLOSE = '=== END DESCRIPTION ==='; - -// Build the marker / trigger payload. If `description` is provided, the -// agent is told the user has already described the task; otherwise the -// agent is told to ask for a description (used for tests + edge cases only -// — in practice the CLI refuses to drop a marker without a description). -// -// Keep the first line stable: hooks and rules key off the prefix -// `agent-scope: start task onboarding.`. -export function buildOnboardingTrigger({ description = '' } = {}) { - const desc = typeof description === 'string' ? description.trim() : ''; - const hasDesc = desc.length > 0; - - const descBlock = hasDesc - ? [ - '', - DESCRIPTION_OPEN, - desc, - DESCRIPTION_CLOSE, - '', - ] - : []; - - return [ - 'agent-scope: start task onboarding.', - '', - hasDesc - ? 'The user ran `pnpm task start` and has already provided their task description below. DO NOT ask them to describe it again — use the description as your brief.' - : 'The user ran `pnpm task start` but did not include a description. Ask them to describe the task in one short chat message before proceeding.', - ...descBlock, - 'Task onboarding protocol — follow EXACTLY (full text in CLAUDE.md,', - '.cursor/rules/agent-scope.mdc, AGENTS.md, GEMINI.md):', - '', - ' 1. Stop whatever you were about to do on this turn.', - ' 2. Delete `agent-scope/.pending-onboarding` if it still exists.', - hasDesc - ? ' 3. Read the description above. Do not ask the user to describe it.' - : ' 3. Ask in plain chat: "What are you working on?" Wait for reply.', - ' 4. Explore the codebase — Glob, Grep, Read, SemanticSearch, DKG —', - ' to find the files the task will touch.', - ' 5. Draft a conservative set of allowed globs:', - ' - inherit `base` (standard build-artefact exemptions)', - ' - append `!**/secrets.*` and `!**/.env*` safety denies', - ' - prefer whole-package globs (packages//**) over files', - ' when in doubt — over-scoping is safe, under-scoping causes', - ' constant denials mid-work.', - ' 6. Propose the scope via a SINGLE `AskQuestion` — ONE question, TWO', - ' options. Write it like you are asking a coworker. Three', - ' sentences max: one-line rephrase of the task, the scope you', - ' propose as a 3–5 item NUMBERED list (1) 2) 3) ...), then', - ' "Sound good?" Options (ids exactly):', - ' - go — "Yes, go with that"', - ' - custom_instruction — "Type what you want instead"', - '', - ' 7. On `go`: YOU run `pnpm task create --description "..." \\', - ' --allowed "..." --inherits base --activate` via the Shell tool.', - ' The afterShellExecution / PostToolUse-Bash hook has a narrow', - ' allowlist for this exact shape, so the manifest + active', - ' marker persist. The `--allowed` flags MUST match the globs you', - ' proposed verbatim. After success, continue with the real work', - ' in the same turn.', - '', - ' 8. On `custom_instruction`: ask the user in plain chat what they', - ' want changed (packages, globs, task id, whatever). Apply it to', - ' the draft and re-ask step 6 — still ONE question, TWO options.', - '', - 'Phrasing: short and natural. No ALL-CAPS banners, no architecture', - 'explanations, no emoji unless the user uses them first.', - '', - 'Your onboarding turn starts now. Skip any other pending work until the', - 'scope is approved or cancelled.', - ].join('\n'); -} - -// Description-less trigger, kept as an export for backwards compatibility -// (existing hooks inject this text; existing tests assert its shape). New -// code should call `buildOnboardingTrigger({ description })`. -export const ONBOARDING_TRIGGER_TEXT = buildOnboardingTrigger(); - -// Extract the description back out of a marker payload. Returns the -// description string, or '' if the marker had no description block. -// Tolerant of whitespace and trailing noise. -export function extractDescription(payload) { - if (typeof payload !== 'string' || !payload.length) return ''; - const open = payload.indexOf(DESCRIPTION_OPEN); - const close = payload.indexOf(DESCRIPTION_CLOSE); - if (open < 0 || close < 0 || close < open) return ''; - const start = open + DESCRIPTION_OPEN.length; - return payload.slice(start, close).trim(); -} - -// --------------------------------------------------------------------------- -// Marker file lifecycle -// --------------------------------------------------------------------------- - -export function onboardingMarkerPath(root) { - return resolve(root, ONBOARDING_MARKER_REL); -} - -export function writeOnboardingMarker(root, payload = ONBOARDING_TRIGGER_TEXT) { - const p = onboardingMarkerPath(root); - writeFileSync(p, payload, 'utf8'); - return p; -} - -export function hasOnboardingMarker(root) { - try { return existsSync(onboardingMarkerPath(root)); } catch { return false; } -} - -export function readOnboardingMarker(root) { - try { - const p = onboardingMarkerPath(root); - if (!existsSync(p)) return null; - return readFileSync(p, 'utf8'); - } catch { return null; } -} - -// Read-and-delete. Used by AUTHORITATIVE consumers only (sessionStart, -// stop, UserPromptSubmit). The postToolUse peek-hooks do NOT use this — -// see `.cursor/hooks/post-tool-use.mjs` for the race rationale. -export function consumeOnboardingMarker(root) { - const p = onboardingMarkerPath(root); - try { - if (!existsSync(p)) return null; - const payload = readFileSync(p, 'utf8'); - try { unlinkSync(p); } catch { try { rmSync(p, { force: true }); } catch {} } - return payload; - } catch { return null; } -} - -// Idempotent delete. Used by `pnpm task create --activate` and -// `pnpm task clear` to clean up a pending marker once the flow is -// resolved (task activated or abandoned). No-op if the marker is absent. -export function deleteOnboardingMarker(root) { - const p = onboardingMarkerPath(root); - try { - if (!existsSync(p)) return false; - try { unlinkSync(p); } catch { try { rmSync(p, { force: true }); } catch {} } - return true; - } catch { return false; } -} - -// --------------------------------------------------------------------------- -// Cross-platform clipboard copy (best-effort) -// --------------------------------------------------------------------------- - -// Try a chain of clipboard commands; first that succeeds wins. Returns -// { ok: true, method: 'pbcopy' } on success or { ok: false, reason } on -// failure. Always swallows errors — clipboard is a UX nicety, not a contract. -export function copyToClipboard(text) { - const os = platform(); - const attempts = []; - - if (os === 'darwin') { - attempts.push(['pbcopy', []]); - } else if (os === 'win32') { - attempts.push(['clip', []]); - } else if (os === 'linux') { - attempts.push(['wl-copy', []]); - attempts.push(['xclip', ['-selection', 'clipboard']]); - attempts.push(['xsel', ['--clipboard', '--input']]); - } - - attempts.push(['pbcopy', []]); - - for (const [cmd, args] of attempts) { - const res = spawnSync(cmd, args, { - input: text, - encoding: 'utf8', - stdio: ['pipe', 'ignore', 'ignore'], - timeout: 2000, - }); - if (res.status === 0 && !res.error) { - return { ok: true, method: cmd }; - } - } - return { ok: false, reason: 'no clipboard tool available on this system' }; -} diff --git a/agent-scope/lib/onboarding.test.mjs b/agent-scope/lib/onboarding.test.mjs deleted file mode 100644 index cbd50f0aa..000000000 --- a/agent-scope/lib/onboarding.test.mjs +++ /dev/null @@ -1,228 +0,0 @@ -import test from 'node:test'; -import assert from 'node:assert/strict'; -import { - mkdtempSync, mkdirSync, rmSync, existsSync, readFileSync, -} from 'node:fs'; -import { join } from 'node:path'; -import { tmpdir } from 'node:os'; - -import { - ONBOARDING_MARKER_REL, - ONBOARDING_TRIGGER_TEXT, - DESCRIPTION_OPEN, - DESCRIPTION_CLOSE, - buildOnboardingTrigger, - extractDescription, - onboardingMarkerPath, - writeOnboardingMarker, - hasOnboardingMarker, - readOnboardingMarker, - consumeOnboardingMarker, - deleteOnboardingMarker, - copyToClipboard, -} from './onboarding.mjs'; - -function mkRoot() { - const root = mkdtempSync(join(tmpdir(), 'as-onboard-')); - mkdirSync(join(root, 'agent-scope'), { recursive: true }); - return root; -} -function cleanup(root) { rmSync(root, { recursive: true, force: true }); } - -test('ONBOARDING_MARKER_REL is stable, hidden, under agent-scope/', () => { - assert.equal(ONBOARDING_MARKER_REL, 'agent-scope/.pending-onboarding'); -}); - -test('ONBOARDING_TRIGGER_TEXT starts with the canonical prefix and covers the protocol', () => { - assert.ok(ONBOARDING_TRIGGER_TEXT.length > 100); - assert.ok( - ONBOARDING_TRIGGER_TEXT.startsWith('agent-scope: start task onboarding'), - 'trigger must begin with the documented prefix', - ); - assert.ok(ONBOARDING_TRIGGER_TEXT.includes('Task onboarding protocol')); - assert.ok(ONBOARDING_TRIGGER_TEXT.includes('AskQuestion')); - assert.ok(ONBOARDING_TRIGGER_TEXT.includes('pnpm task create')); -}); - -test('buildOnboardingTrigger: without description → description-less trigger', () => { - const t = buildOnboardingTrigger(); - assert.equal(t, ONBOARDING_TRIGGER_TEXT); - assert.ok(!t.includes(DESCRIPTION_OPEN)); -}); - -test('buildOnboardingTrigger: embeds the description in a fenced block', () => { - const desc = 'Refactor peer sync in agent + core packages.'; - const t = buildOnboardingTrigger({ description: desc }); - assert.ok(t.includes(DESCRIPTION_OPEN)); - assert.ok(t.includes(DESCRIPTION_CLOSE)); - assert.ok(t.includes(desc)); - assert.ok(t.includes('DO NOT ask them to describe it again')); -}); - -test('buildOnboardingTrigger: preserves multi-line descriptions verbatim', () => { - const desc = 'line one\nline two\n\nline four'; - const t = buildOnboardingTrigger({ description: desc }); - assert.ok(t.includes(desc)); -}); - -test('buildOnboardingTrigger: trims leading/trailing whitespace on description', () => { - const t = buildOnboardingTrigger({ description: ' hello \n' }); - assert.ok(t.includes('hello')); - assert.ok(!t.includes(' hello'), 'leading spaces should be trimmed'); -}); - -test('buildOnboardingTrigger: empty string description → treated as missing', () => { - const t = buildOnboardingTrigger({ description: ' \n ' }); - assert.equal(t, ONBOARDING_TRIGGER_TEXT); -}); - -test('extractDescription: round-trips through a smart trigger', () => { - const desc = 'Refactor peer sync\nwith workspace auth.'; - const t = buildOnboardingTrigger({ description: desc }); - assert.equal(extractDescription(t), desc); -}); - -test('extractDescription: returns empty string for a description-less trigger', () => { - assert.equal(extractDescription(ONBOARDING_TRIGGER_TEXT), ''); -}); - -test('extractDescription: tolerates nulls and non-strings', () => { - assert.equal(extractDescription(null), ''); - assert.equal(extractDescription(undefined), ''); - assert.equal(extractDescription(''), ''); - assert.equal(extractDescription({}), ''); -}); - -test('extractDescription: returns empty when markers are malformed (close before open)', () => { - const bad = `${DESCRIPTION_CLOSE} text ${DESCRIPTION_OPEN}`; - assert.equal(extractDescription(bad), ''); -}); - -test('onboardingMarkerPath joins repo root with the relative marker path', () => { - const root = mkRoot(); - try { - assert.equal(onboardingMarkerPath(root), join(root, ONBOARDING_MARKER_REL)); - } finally { cleanup(root); } -}); - -test('marker: write creates the file with the given payload', () => { - const root = mkRoot(); - try { - writeOnboardingMarker(root, 'hello'); - assert.ok(existsSync(onboardingMarkerPath(root))); - assert.equal(readFileSync(onboardingMarkerPath(root), 'utf8'), 'hello'); - } finally { cleanup(root); } -}); - -test('marker: write defaults to the canonical trigger text', () => { - const root = mkRoot(); - try { - writeOnboardingMarker(root); - assert.equal( - readFileSync(onboardingMarkerPath(root), 'utf8'), - ONBOARDING_TRIGGER_TEXT, - ); - } finally { cleanup(root); } -}); - -test('marker: hasOnboardingMarker reflects filesystem state', () => { - const root = mkRoot(); - try { - assert.equal(hasOnboardingMarker(root), false); - writeOnboardingMarker(root, 'x'); - assert.equal(hasOnboardingMarker(root), true); - } finally { cleanup(root); } -}); - -test('marker: readOnboardingMarker returns null when absent', () => { - const root = mkRoot(); - try { - assert.equal(readOnboardingMarker(root), null); - } finally { cleanup(root); } -}); - -test('marker: readOnboardingMarker returns the payload when present', () => { - const root = mkRoot(); - try { - writeOnboardingMarker(root, 'payload-123'); - assert.equal(readOnboardingMarker(root), 'payload-123'); - } finally { cleanup(root); } -}); - -test('marker: consumeOnboardingMarker returns payload AND deletes the file (one-shot)', () => { - const root = mkRoot(); - try { - writeOnboardingMarker(root, 'once'); - assert.ok(existsSync(onboardingMarkerPath(root))); - assert.equal(consumeOnboardingMarker(root), 'once'); - assert.equal(existsSync(onboardingMarkerPath(root)), false); - assert.equal(consumeOnboardingMarker(root), null); - } finally { cleanup(root); } -}); - -test('marker: consumeOnboardingMarker on missing file returns null without throwing', () => { - const root = mkRoot(); - try { - assert.equal(consumeOnboardingMarker(root), null); - } finally { cleanup(root); } -}); - -test('marker: readOnboardingMarker is read-only — does NOT delete (peek semantics)', () => { - // This is the critical invariant for postToolUse peek hooks. If this - // regresses, existing-chat onboarding in Cursor breaks again because - // the marker gets deleted mid-turn before the agent sees it. - const root = mkRoot(); - try { - writeOnboardingMarker(root, 'peek me'); - assert.equal(readOnboardingMarker(root), 'peek me'); - assert.ok(existsSync(onboardingMarkerPath(root)), 'marker must survive a read'); - // Repeated reads must keep returning the payload until someone - // authoritative deletes it. - assert.equal(readOnboardingMarker(root), 'peek me'); - assert.equal(readOnboardingMarker(root), 'peek me'); - assert.ok(existsSync(onboardingMarkerPath(root))); - } finally { cleanup(root); } -}); - -test('marker: deleteOnboardingMarker removes the file and returns true', () => { - const root = mkRoot(); - try { - writeOnboardingMarker(root, 'bye'); - assert.equal(deleteOnboardingMarker(root), true); - assert.equal(existsSync(onboardingMarkerPath(root)), false); - } finally { cleanup(root); } -}); - -test('marker: deleteOnboardingMarker on missing file is a no-op returning false', () => { - const root = mkRoot(); - try { - assert.equal(deleteOnboardingMarker(root), false); - } finally { cleanup(root); } -}); - -test('marker: delete is idempotent (safe to call twice)', () => { - const root = mkRoot(); - try { - writeOnboardingMarker(root, 'x'); - assert.equal(deleteOnboardingMarker(root), true); - assert.equal(deleteOnboardingMarker(root), false); - assert.equal(existsSync(onboardingMarkerPath(root)), false); - } finally { cleanup(root); } -}); - -test('copyToClipboard returns a structured result (never throws)', () => { - const result = copyToClipboard('test payload'); - assert.ok(result && typeof result === 'object'); - assert.ok('ok' in result); - if (result.ok) { - assert.equal(typeof result.method, 'string'); - } else { - assert.equal(typeof result.reason, 'string'); - } -}); - -test('copyToClipboard tolerates empty string input', () => { - const result = copyToClipboard(''); - assert.ok(result && typeof result === 'object'); - assert.ok('ok' in result); -}); diff --git a/agent-scope/lib/prompter.mjs b/agent-scope/lib/prompter.mjs deleted file mode 100644 index fa870b917..000000000 --- a/agent-scope/lib/prompter.mjs +++ /dev/null @@ -1,171 +0,0 @@ -// Tiny interactive-prompter built on readline. Zero external deps so it -// works from a freshly-cloned repo. The CLI uses it for `pnpm task start`; -// it's also exported in case anyone wants to drop another wizard on top. -// -// Design rules: -// - Every prompt has a default that's used on blank input. -// - Nothing here mutates global state (process.stdin etc.) — the input/ -// output streams are injectable so tests can feed canned stdin. -// - `close()` is safe to call multiple times. - -import { createInterface } from 'node:readline'; - -export function createPrompter({ - input = process.stdin, - output = process.stdout, -} = {}) { - const rl = createInterface({ input, output, terminal: false }); - const buffered = []; - const waiters = []; - let closed = false; - - rl.on('line', line => { - if (waiters.length) waiters.shift()(line); - else buffered.push(line); - }); - rl.on('close', () => { - closed = true; - while (waiters.length) waiters.shift()(''); - }); - - const readLine = () => new Promise(r => { - if (buffered.length) return r(buffered.shift()); - if (closed) return r(''); - waiters.push(r); - }); - - // Non-blocking: resolves with the next line if one arrives within - // `timeoutMs`, otherwise null. Used for paste-detection where we want - // to treat typed-and-Enter input as single-line but still capture - // pasted multi-line content (terminal pastes deliver each line as a - // separate `line` event within a few milliseconds). - const tryReadLine = (timeoutMs) => new Promise(resolve => { - if (buffered.length) return resolve(buffered.shift()); - if (closed) return resolve(null); - let settled = false; - const waiter = (line) => { - if (settled) return; - settled = true; - clearTimeout(t); - resolve(line); - }; - waiters.push(waiter); - const t = setTimeout(() => { - if (settled) return; - settled = true; - const idx = waiters.indexOf(waiter); - if (idx >= 0) waiters.splice(idx, 1); - resolve(null); - }, timeoutMs); - }); - - const write = (s) => { try { output.write(s); } catch { /* ignore */ } }; - - async function ask(prompt, { default: dflt = '' } = {}) { - write(prompt); - const line = await readLine(); - const v = (line ?? '').trim(); - return v.length ? v : dflt; - } - - async function askYesNo(prompt, { default: dflt = true } = {}) { - const tag = dflt ? '[Y/n]' : '[y/N]'; - const ans = (await ask(`${prompt} ${tag} `)).toLowerCase(); - if (!ans) return dflt; - if (/^y(es)?$/.test(ans)) return true; - if (/^n(o)?$/.test(ans)) return false; - return dflt; - } - - async function askChoice(prompt, options, { default: dflt } = {}) { - // options: [{ key, label }] - const byKey = new Map(options.map(o => [o.key.toLowerCase(), o])); - const display = options - .map(o => (o.key === dflt ? o.key.toUpperCase() : o.key)) - .join('/'); - for (const o of options) write(` [${o.key}] ${o.label}\n`); - const ans = (await ask(`Choice [${display}]: `)).toLowerCase(); - if (!ans && dflt) return dflt; - if (byKey.has(ans)) return byKey.get(ans).key; - return dflt || options[0].key; - } - - // Reads a list of integers (1-based) entered space- or comma-separated. - // Returns a de-duped sorted array of indices within [1, count]. - async function askMultiNumber(prompt, count, { default: dflt = [] } = {}) { - const defaultStr = dflt.length ? dflt.join(' ') : ''; - const raw = await ask(prompt, { default: defaultStr }); - if (!raw) return []; - if (/^none$/i.test(raw) || /^-$/.test(raw)) return []; - const nums = raw - .split(/[\s,]+/) - .filter(Boolean) - .map(s => parseInt(s, 10)) - .filter(n => Number.isInteger(n) && n >= 1 && n <= count); - return [...new Set(nums)].sort((a, b) => a - b); - } - - // Read a task description with single-Enter submission and paste - // detection. Flow: - // 1. Block for the first non-empty line. Blank lines before any - // content are ignored, up to `maxBlankBeforeContent`. - // 2. After the first line, poll `tryReadLine(pasteQuietMs)` — if - // another line arrives inside that window it's part of a multi- - // line paste (terminal pastes deliver each line as a separate - // `line` event within a few ms). Keep appending; each new line - // resets the window. - // 3. As soon as the quiet window expires with no new line, stop. - // - // This means typing one line + Enter submits immediately (no more - // "press Enter twice"), while a multi-paragraph paste still gets - // captured in full. Trailing blank lines (common at the end of a - // paste) are trimmed. Internal blank lines (paragraph breaks) are - // preserved. - async function askPasteableDescription(prompt = '> ', { - pasteQuietMs = 80, - maxLines = 2000, - maxBlankBeforeContent = 3, - } = {}) { - const lines = []; - let emptyBeforeContent = 0; - - while (lines.length === 0) { - const line = await ask(prompt); - if (line && line.trim().length) { lines.push(line); break; } - if (++emptyBeforeContent >= maxBlankBeforeContent) return ''; - } - - while (lines.length < maxLines) { - const next = await tryReadLine(pasteQuietMs); - if (next === null) break; - if (next === '') { lines.push(''); continue; } - lines.push(next); - } - - while (lines.length && !lines[lines.length - 1].trim()) lines.pop(); - return lines.join('\n'); - } - - // Read free-text lines until a blank line. Useful for "extra globs". - async function askLines(headline, { hint } = {}) { - if (headline) write(headline + '\n'); - if (hint) write(` (${hint})\n`); - const lines = []; - for (;;) { - write(' > '); - const line = await readLine(); - if (line === null || line === undefined) break; - const v = line.trim(); - if (!v) break; - lines.push(v); - } - return lines; - } - - function close() { try { rl.close(); } catch { /* ignore */ } } - - return { - ask, askYesNo, askChoice, askMultiNumber, askLines, - askPasteableDescription, tryReadLine, close, - }; -} diff --git a/agent-scope/lib/prompter.test.mjs b/agent-scope/lib/prompter.test.mjs deleted file mode 100644 index fdd01cb5c..000000000 --- a/agent-scope/lib/prompter.test.mjs +++ /dev/null @@ -1,221 +0,0 @@ -// Unit tests for the tiny prompter. Focuses on the paste-detection -// primitives used by `pnpm task start`'s description reader — -// i.e. the `tryReadLine(timeoutMs)` helper and its interaction with -// the blocking `ask()` path. - -import test from 'node:test'; -import assert from 'node:assert/strict'; -import { PassThrough } from 'node:stream'; -import { createPrompter } from './prompter.mjs'; - -function makePrompter() { - const input = new PassThrough(); - const output = new PassThrough(); - // Drain output so writes don't back-pressure the PassThrough. - output.on('data', () => {}); - const p = createPrompter({ input, output }); - return { p, input, output }; -} - -function feed(input, line) { input.write(`${line}\n`); } - -test('tryReadLine: buffered line resolves synchronously (same tick)', async () => { - const { p, input } = makePrompter(); - feed(input, 'first'); - // Give the readline transform a tick to push the line event. - await new Promise(r => setImmediate(r)); - const got = await p.tryReadLine(500); - assert.equal(got, 'first'); - p.close(); -}); - -test('tryReadLine: returns null after timeout when no input', async () => { - const { p } = makePrompter(); - const t0 = Date.now(); - const got = await p.tryReadLine(60); - const elapsed = Date.now() - t0; - assert.equal(got, null); - // Should settle promptly — allow generous slack for slow CI. - assert.ok(elapsed >= 55, `expected >=55ms, got ${elapsed}`); - assert.ok(elapsed <= 400, `expected <=400ms, got ${elapsed}`); - p.close(); -}); - -test('tryReadLine: resolves when line arrives inside the window', async () => { - const { p, input } = makePrompter(); - setTimeout(() => feed(input, 'late-but-not-too-late'), 20); - const got = await p.tryReadLine(200); - assert.equal(got, 'late-but-not-too-late'); - p.close(); -}); - -test('tryReadLine: does NOT steal from later waiters after timeout', async () => { - const { p, input } = makePrompter(); - - // First call times out because nothing arrives. - const first = await p.tryReadLine(40); - assert.equal(first, null); - - // Now a real line arrives — it should route to the next reader, - // not some ghost of the timed-out waiter. - feed(input, 'hello'); - const got = await p.tryReadLine(200); - assert.equal(got, 'hello'); - p.close(); -}); - -test('tryReadLine: resolves null once the stream has been closed', async () => { - const { p, input } = makePrompter(); - input.end(); - // Let the readline 'close' event propagate. - await new Promise(r => setImmediate(r)); - const got = await p.tryReadLine(100); - assert.equal(got, null); - p.close(); -}); - -test('ask + tryReadLine compose: first line blocks, then we poll the tail', async () => { - const { p, input } = makePrompter(); - - // Mimic the smart-mode description reader: block for the first line, - // then collect any immediately-following lines (paste-detection). - setTimeout(() => { - feed(input, 'line A'); - feed(input, 'line B'); - feed(input, 'line C'); - }, 5); - - const first = await p.ask('> '); - const more = []; - for (;;) { - const next = await p.tryReadLine(40); - if (next === null) break; - more.push(next); - } - assert.equal(first, 'line A'); - assert.deepEqual(more, ['line B', 'line C']); - p.close(); -}); - -test('ask returns blank when stream closes with no input', async () => { - const { p, input } = makePrompter(); - setTimeout(() => input.end(), 10); - const got = await p.ask('> '); - assert.equal(got, ''); - p.close(); -}); - -// --- askPasteableDescription: single-Enter submission + paste detection --- - -test('askPasteableDescription: single line + one Enter submits immediately', async () => { - const { p, input } = makePrompter(); - setTimeout(() => feed(input, 'Refactor peer sync for workspace auth'), 5); - const got = await p.askPasteableDescription('> ', { pasteQuietMs: 40 }); - assert.equal(got, 'Refactor peer sync for workspace auth'); - p.close(); -}); - -test('askPasteableDescription: multi-line paste is captured in full', async () => { - const { p, input } = makePrompter(); - setTimeout(() => { - feed(input, 'line one'); - feed(input, 'line two'); - feed(input, 'line three'); - }, 5); - const got = await p.askPasteableDescription('> ', { pasteQuietMs: 60 }); - assert.equal(got, 'line one\nline two\nline three'); - p.close(); -}); - -test('askPasteableDescription: blank line in middle of paste is preserved', async () => { - const { p, input } = makePrompter(); - setTimeout(() => { - feed(input, 'paragraph 1'); - feed(input, ''); - feed(input, 'paragraph 2'); - }, 5); - const got = await p.askPasteableDescription('> ', { pasteQuietMs: 60 }); - assert.equal(got, 'paragraph 1\n\nparagraph 2'); - p.close(); -}); - -test('askPasteableDescription: trailing blank lines are trimmed', async () => { - const { p, input } = makePrompter(); - setTimeout(() => { - feed(input, 'content'); - feed(input, ''); - feed(input, ''); - }, 5); - const got = await p.askPasteableDescription('> ', { pasteQuietMs: 50 }); - assert.equal(got, 'content'); - p.close(); -}); - -test('askPasteableDescription: leading blanks ignored up to maxBlankBeforeContent', async () => { - const { p, input } = makePrompter(); - setTimeout(() => { - feed(input, ''); - feed(input, ''); - feed(input, 'finally'); - }, 5); - const got = await p.askPasteableDescription('> ', { - pasteQuietMs: 50, - maxBlankBeforeContent: 5, - }); - assert.equal(got, 'finally'); - p.close(); -}); - -test('askPasteableDescription: bails empty-string after maxBlankBeforeContent', async () => { - const { p, input } = makePrompter(); - setTimeout(() => { - feed(input, ''); - feed(input, ''); - feed(input, ''); - }, 5); - const got = await p.askPasteableDescription('> ', { - pasteQuietMs: 50, - maxBlankBeforeContent: 3, - }); - assert.equal(got, ''); - p.close(); -}); - -test('askPasteableDescription: late-arriving line INSIDE quiet window is appended', async () => { - const { p, input } = makePrompter(); - setTimeout(() => feed(input, 'first'), 5); - setTimeout(() => feed(input, 'second (just inside window)'), 40); - const got = await p.askPasteableDescription('> ', { pasteQuietMs: 100 }); - assert.equal(got, 'first\nsecond (just inside window)'); - p.close(); -}); - -test('askPasteableDescription: line arriving AFTER quiet window is NOT appended', async () => { - const { p, input } = makePrompter(); - setTimeout(() => feed(input, 'only this'), 5); - // Give enough time for the first read + the quiet window to elapse - // before sending the second line. - setTimeout(() => feed(input, 'too late, separate turn'), 200); - const got = await p.askPasteableDescription('> ', { pasteQuietMs: 40 }); - assert.equal(got, 'only this'); - p.close(); -}); - -test('askPasteableDescription: respects maxLines cap on a runaway paste', async () => { - const { p, input } = makePrompter(); - // Keep feeding lines forever (every few ms) — cap stops the reader. - let i = 0; - const iv = setInterval(() => feed(input, `L${i++}`), 5); - try { - const got = await p.askPasteableDescription('> ', { - pasteQuietMs: 40, - maxLines: 5, - }); - const lines = got.split('\n'); - assert.equal(lines.length, 5); - assert.ok(lines.every(l => /^L\d+$/.test(l)), `unexpected lines: ${got}`); - } finally { - clearInterval(iv); - p.close(); - } -}); diff --git a/agent-scope/lib/scope.mjs b/agent-scope/lib/scope.mjs index 6eb704960..f9c9920e2 100644 --- a/agent-scope/lib/scope.mjs +++ b/agent-scope/lib/scope.mjs @@ -1,16 +1,32 @@ // Shared scope-check library. Zero runtime dependencies; must work from -// Cursor hooks, git hooks, CLI, and CI. Node 20+. +// Cursor hooks, Claude Code hooks, git hooks, CLI, and CI. Node 20+. // -// Bootstrap modes (disables hardcoded protection): +// Source of truth (post-DKG-integration): +// The agent's "active scope" is derived live from the local DKG daemon — +// specifically, the union of `tasks:scopedToPath` globs across every +// `tasks:Task` whose current status is `"in_progress"` AND which is +// attributed to this agent (`prov:wasAttributedTo `). +// See `agent-scope/lib/dkg-source.mjs` for the SPARQL + cache layer. +// +// Legacy local files (`agent-scope/active`, `agent-scope/tasks/*.json`) +// are GONE — there is no fallback path. If the daemon is unreachable or +// the workspace's `.dkg/config.yaml` is incomplete, the guard falls open +// for non-protected paths (only the hardcoded protected list still +// applies). Hardcoded protected paths defend the guard's own files; +// they're disabled only by bootstrap mode. +// +// Bootstrap mode (disables hardcoded protection): // 1. env: AGENT_SCOPE_BOOTSTRAP=1 // 2. file: agent-scope/.bootstrap-token exists -// Token file is itself protected — only the human can create/remove it from -// outside the agent sandbox. Intentional convention: git-visible. +// The token file is itself protected — only the human can create / remove +// it from outside the agent sandbox. -import { readFileSync, existsSync, readdirSync, statSync } from 'node:fs'; +import { existsSync } from 'node:fs'; import { resolve, relative, sep, dirname, isAbsolute } from 'node:path'; -import { execFileSync } from 'node:child_process'; -import { fileURLToPath } from 'node:url'; +import { fileURLToPath, pathToFileURL } from 'node:url'; +import { + resolveDkgScope, readCachedScopeSync, loadDkgWorkspaceConfig, describeScope, +} from './dkg-source.mjs'; // --------------------------------------------------------------------------- // Node version check @@ -24,13 +40,13 @@ export function checkNodeVersion(minMajor = MIN_NODE_MAJOR) { if (major < minMajor) { throw new Error( `agent-scope requires Node ${minMajor}+ but found ${process.version}. ` + - `Update Node (nvm install 22) and retry.` + `Update Node (nvm install 22) and retry.`, ); } } // --------------------------------------------------------------------------- -// Protected paths +// Protected paths (always-on, regardless of active task) // --------------------------------------------------------------------------- export const PROTECTED_PATTERNS = [ @@ -40,10 +56,6 @@ export const PROTECTED_PATTERNS = [ '.claude/hooks/**', '.claude/settings.json', 'agent-scope/lib/**', - 'agent-scope/bin/**', - 'agent-scope/schema/**', - 'agent-scope/tasks/**', - 'agent-scope/active', 'agent-scope/.bootstrap-token', 'AGENTS.md', 'GEMINI.md', @@ -91,7 +103,7 @@ export function coversProtected(relPath, root) { } // --------------------------------------------------------------------------- -// Glob +// Glob → RegExp (no deps) // --------------------------------------------------------------------------- function globToRegex(glob) { @@ -166,178 +178,134 @@ export function normalizeToRepoPath(root, p) { } // --------------------------------------------------------------------------- -// Active task resolution +// Active scope resolution (DKG-backed) // --------------------------------------------------------------------------- - -function readFileOrNull(p) { - try { return readFileSync(p, 'utf8'); } catch { return null; } -} - -function safeGit(root, args) { - try { - return execFileSync('git', args, { - cwd: root, - encoding: 'utf8', - stdio: ['ignore', 'pipe', 'ignore'], - timeout: 3000, - }).trim(); - } catch { - return null; - } -} - -const BRANCH_TASK_RE = /^(?:task|agent-scope)\/([a-z0-9][a-z0-9-_.]{0,63})(?:\/|$)/; - -export function detectTaskFromBranch(root) { - const branch = safeGit(root, ['rev-parse', '--abbrev-ref', 'HEAD']); - if (!branch || branch === 'HEAD') return null; - const m = BRANCH_TASK_RE.exec(branch); - return m ? m[1] : null; +// +// Two flavours: +// +// `resolveActiveScope({ root, force })` async — queries the daemon if +// cache is stale; preferred for +// session-start where we want a +// fresh snapshot. +// +// `resolveActiveScopeSync({ root })` sync — reads cache only; falls +// through to a "soft" empty +// scope if cache missing or +// expired. Use from sync-only +// hook contexts (rare); normally +// prefer the async variant. +// +// Both return a "synthetic task" object compatible with the legacy +// `loadTask` shape, so callers just keep using `checkPath(task, ...)`. +// +// Synthetic task shape: +// { +// id: , +// dkgTaskUris: [, ...], +// description: , +// allowed: [...positive globs unioned across all in_progress tasks], +// exemptions: [...negative ('!...') globs unioned across all in_progress tasks], +// reason: 'ok' | 'no-active-task' | 'daemon-unreachable' | ..., +// diagnostic: optional string for surfacing in denial messages, +// } +// +// `reason !== 'ok'` is NOT itself an error — it just means "no scope is +// active right now". Callers decide whether that means deny-everything or +// allow-everything based on their own policy. Current policy: no active +// scope ⇒ allow non-protected writes (legacy default before agent-scope +// took over). The team can tighten that later by emitting a `dkg:setting` +// triple on the project. + +export async function resolveActiveScope({ root, force = false } = {}) { + const repoRoot = root || resolveRepoRoot(); + const dkg = await resolveDkgScope({ root: repoRoot, force }); + return synthesisTask(dkg); } -export function detectTaskFromGitConfig(root) { - const v = safeGit(root, ['config', '--get', 'agent-scope.task']); - return v || null; +export function resolveActiveScopeSync({ root } = {}) { + const repoRoot = root || resolveRepoRoot(); + const dkg = readCachedScopeSync({ root: repoRoot }); + return synthesisTask(dkg); } -export function resolveActiveTaskId(root, opts = {}) { - const fromEnv = process.env.AGENT_SCOPE_TASK; - if (fromEnv && fromEnv.trim()) return { id: fromEnv.trim(), source: 'env' }; - - const activeFile = resolve(root, 'agent-scope/active'); - const fromFile = readFileOrNull(activeFile); - if (fromFile && fromFile.trim()) return { id: fromFile.trim(), source: 'file' }; - - if (!opts.noBranch) { - const fromBranch = detectTaskFromBranch(root); - if (fromBranch) return { id: fromBranch, source: 'branch' }; +function synthesisTask(dkg) { + const tasks = Array.isArray(dkg.tasks) ? dkg.tasks : []; + if (tasks.length === 0) { + return { + id: null, + dkgTaskUris: [], + description: dkg.diagnostic || 'No in_progress task', + allowed: [], + exemptions: [], + reason: dkg.reason || 'no-active-task', + diagnostic: dkg.diagnostic || null, + agentUri: dkg.agentUri || null, + projectId: dkg.projectId || null, + stale: !!dkg.stale, + fromCache: !!dkg.fromCache, + }; } - - if (!opts.noGitConfig) { - const fromCfg = detectTaskFromGitConfig(root); - if (fromCfg) return { id: fromCfg, source: 'git-config' }; - } - - return { id: null, source: 'none' }; -} - -export function getActiveTaskId(root) { - return resolveActiveTaskId(root).id; + const niceId = tasks.length === 1 + ? tasks[0].uri.split(':').pop() + : `${tasks.length} in-progress tasks`; + const description = tasks.length === 1 + ? tasks[0].title + : tasks.map((t) => `${t.title}`).join(' · '); + return { + id: niceId, + dkgTaskUris: tasks.map((t) => t.uri), + tasks, + description, + allowed: dkg.allowed, + exemptions: dkg.exemptions, + reason: 'ok', + diagnostic: null, + agentUri: dkg.agentUri, + projectId: dkg.projectId, + stale: !!dkg.stale, + fromCache: !!dkg.fromCache, + }; } // --------------------------------------------------------------------------- -// Manifest loading + validation + inheritance +// Backwards-compatible shims // --------------------------------------------------------------------------- - -export function listTasks(root) { - const dir = resolve(root, 'agent-scope/tasks'); - if (!existsSync(dir)) return []; - return readdirSync(dir) - .filter(f => f.endsWith('.json')) - .map(f => f.replace(/\.json$/, '')) - .sort(); -} - -const ALLOWED_KEYS = ['id','description','owner','created','allowed','exemptions','notes','dkg','inherits']; - -export function validateManifest(obj, expectedId) { - const errors = []; - if (!obj || typeof obj !== 'object' || Array.isArray(obj)) { - errors.push('manifest must be a JSON object'); - return errors; - } - if (typeof obj.id !== 'string' || !/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(obj.id)) { - errors.push('id must be a string matching /^[a-z0-9][a-z0-9-_.]{0,63}$/'); - } - if (expectedId && obj.id && obj.id !== expectedId) { - errors.push(`id '${obj.id}' does not match filename '${expectedId}'`); - } - const hasInherits = Array.isArray(obj.inherits) && obj.inherits.length > 0; - const hasAllowed = Array.isArray(obj.allowed); - const hasExemptions = Array.isArray(obj.exemptions) && obj.exemptions.length > 0; - if (!hasAllowed && !hasInherits && !hasExemptions) { - errors.push('at least one of allowed / inherits / exemptions must be provided'); - } - if (obj.allowed !== undefined) { - if (!Array.isArray(obj.allowed)) errors.push('allowed must be an array'); - else obj.allowed.forEach((p, i) => { - if (typeof p !== 'string' || !p.length) errors.push(`allowed[${i}] must be a non-empty string`); - }); - } - if (obj.exemptions !== undefined) { - if (!Array.isArray(obj.exemptions)) errors.push('exemptions must be an array'); - else obj.exemptions.forEach((p, i) => { - if (typeof p !== 'string' || !p.length) errors.push(`exemptions[${i}] must be a non-empty string`); - }); - } - if (obj.inherits !== undefined) { - if (!Array.isArray(obj.inherits)) errors.push('inherits must be an array of task ids'); - else obj.inherits.forEach((id, i) => { - if (typeof id !== 'string' || !/^[a-z0-9][a-z0-9-_.]{0,63}$/.test(id)) { - errors.push(`inherits[${i}] must match /^[a-z0-9][a-z0-9-_.]{0,63}$/`); - } - }); - } - for (const k of Object.keys(obj)) { - if (!ALLOWED_KEYS.includes(k)) errors.push(`unknown property: ${k}`); - } - return errors; -} - -function loadAndResolve(root, id, seen = new Set(), chain = []) { - if (seen.has(id)) { - throw new Error(`inheritance cycle detected: ${[...chain, id].join(' -> ')}`); - } - seen.add(id); - - const manifestPath = resolve(root, 'agent-scope/tasks', `${id}.json`); - if (!existsSync(manifestPath)) { - throw new Error(`Task manifest not found: ${manifestPath}` + (chain.length ? ` (inherited from ${chain.join(' -> ')})` : '')); - } - const raw = readFileSync(manifestPath, 'utf8'); - let parsed; - try { parsed = JSON.parse(raw); } - catch (e) { throw new Error(`Task manifest is not valid JSON: ${manifestPath}: ${e.message}`); } - - const errors = validateManifest(parsed, id); - if (errors.length) { - throw new Error(`Invalid task manifest ${manifestPath}:\n - ${errors.join('\n - ')}`); - } - parsed.allowed = parsed.allowed || []; - parsed.exemptions = parsed.exemptions || []; - - const merged = { allowed: [], exemptions: [] }; - for (const parentId of parsed.inherits || []) { - const parent = loadAndResolve(root, parentId, new Set(seen), [...chain, id]); - merged.allowed.push(...parent.allowed); - merged.exemptions.push(...parent.exemptions); +// +// Older hook code calls `resolveActiveTaskId(root)` then `loadTask(root, id)` +// then `checkPath(task, rel, root)`. With the DKG flip, those calls collapse +// into a single async query — but we keep the names so callers don't all +// need to change at once. +// +// `resolveActiveTaskId(root)` → sync-only, reads cache; returns +// `{ id, source, scope }`. +// `loadTask(root, id, scope?)` → no-op if `scope` is passed (we already +// resolved it). When `scope` is omitted, +// does a sync cache read for backwards +// compatibility. +// `checkPath(task, ...)` → unchanged (works on the synthetic task). + +export function resolveActiveTaskId(root, _opts = {}) { + const scope = resolveActiveScopeSync({ root }); + if (scope.reason === 'ok') { + return { id: scope.id, source: scope.fromCache ? 'dkg-cache' : 'dkg', scope }; } - merged.allowed.push(...parsed.allowed); - merged.exemptions.push(...parsed.exemptions); - - return { - ...parsed, - allowed: dedupe(merged.allowed), - exemptions: dedupe(merged.exemptions), - __path: manifestPath, - __inheritedFrom: parsed.inherits || [], - }; + return { id: null, source: scope.reason, scope }; } -function dedupe(arr) { - const seen = new Set(); - const out = []; - for (const x of arr) { if (!seen.has(x)) { seen.add(x); out.push(x); } } - return out; +export function loadTask(root, _id, scope) { + if (scope) return scope; + // Last-ditch sync read of cache when caller didn't pass a pre-resolved + // scope. Hooks that have access to async should prefer + // `resolveActiveScope({ root, force })`. + return resolveActiveScopeSync({ root }); } -export function loadTask(root, id) { - if (!id) return null; - return loadAndResolve(root, id); +export function getActiveTaskId(root) { + return resolveActiveTaskId(root).id; } // --------------------------------------------------------------------------- -// Core decision +// Core path decision // --------------------------------------------------------------------------- export function checkPath(task, relPath, root) { @@ -346,7 +314,9 @@ export function checkPath(task, relPath, root) { if (checkProtected(relPath, root) === 'deny') return 'protected'; - if (!task) return 'allow'; + // No active scope ⇒ allow non-protected writes (soft default before any + // task is in_progress; matches the legacy file-based behaviour). + if (!task || task.reason !== 'ok') return 'allow'; if (matchAnyNegation(task.allowed, relPath)) return 'deny'; if (matchAnyNegation(task.exemptions, relPath)) return 'deny'; @@ -370,48 +340,64 @@ export function explainDeny(task, relPath, decision) { `in their own terminal, or set AGENT_SCOPE_BOOTSTRAP=1 in their env).`, ``, `Protected patterns:`, - ...PROTECTED_PATTERNS.map(p => ` - ${p}`), + ...PROTECTED_PATTERNS.map((p) => ` - ${p}`), ].join('\n'); } - if (!task) return ''; + if (!task || task.reason !== 'ok') return ''; - const positives = (task.allowed || []).filter(p => !p.startsWith('!')); - const negatives = (task.allowed || []).filter(p => p.startsWith('!')) - .concat((task.exemptions || []).filter(p => p.startsWith('!'))); - const exemptions = (task.exemptions || []).filter(p => !p.startsWith('!')); + const positives = (task.allowed || []).filter((p) => !p.startsWith('!')); + const negatives = (task.allowed || []).filter((p) => p.startsWith('!')) + .concat((task.exemptions || []).filter((p) => p.startsWith('!'))); + const exemptions = (task.exemptions || []).filter((p) => !p.startsWith('!')); + + const header = task.dkgTaskUris && task.dkgTaskUris.length === 1 + ? `Active in-progress task: ${task.dkgTaskUris[0]} — ${task.description || ''}` + : `Active in-progress tasks (${task.dkgTaskUris?.length || 0}):\n${(task.tasks || []).map((t) => ` - ${t.uri} — ${t.title}`).join('\n')}`; const lines = [ `OUT OF TASK SCOPE.`, - `Active task: ${task.id} — ${task.description || ''}`, + header, `Denied path: ${relPath}`, ``, - `This task only permits writes to paths matching:`, - ...(positives.length ? positives.map(p => ` - ${p}`) : [' (nothing)']), + `The current scope only permits writes to paths matching:`, + ...(positives.length ? positives.map((p) => ` - ${p}`) : [' (nothing)']), ]; if (exemptions.length) { - lines.push('', 'Exempted patterns (always allowed):', ...exemptions.map(p => ` - ${p}`)); + lines.push('', 'Exempted patterns (always allowed):', ...exemptions.map((p) => ` - ${p}`)); } if (negatives.length) { - lines.push('', 'Explicit deny patterns:', ...negatives.map(p => ` - ${p}`)); + lines.push('', 'Explicit deny patterns:', ...negatives.map((p) => ` - ${p}`)); } lines.push( '', - `If this change is needed for the current task, STOP and ask the user for`, - `explicit approval. The user can approve by adding the path (or a covering`, - `glob) to agent-scope/tasks/${task.id}.json under 'allowed' or 'exemptions',`, - `or by switching tasks.` + `If this change is needed for current work, STOP and ask the user. The agent`, + `extends scope by editing the relevant DKG task: call`, + `\`dkg_add_task\` (with status:"in_progress" and a covering glob in scopedToPath)`, + `for a new piece of work, or — if the user agrees — re-file the existing`, + `task with an extended scope. Do NOT improvise around denials.`, ); return lines.join('\n'); } +// --------------------------------------------------------------------------- +// Convenience entry-point used by the standalone status CLI / pre-commit // --------------------------------------------------------------------------- -export function checkPathFromAnywhere(p, opts = {}) { +export async function checkPathFromAnywhere(p, opts = {}) { const root = opts.root || resolveRepoRoot(); - const { id } = opts.taskId ? { id: opts.taskId } : resolveActiveTaskId(root); - const task = id ? loadTask(root, id) : null; + const scope = await resolveActiveScope({ root, force: opts.force }); const rel = normalizeToRepoPath(root, p); - return { root, taskId: id, task, relPath: rel, decision: checkPath(task, rel, root) }; + return { + root, + taskId: scope.id, + task: scope, + relPath: rel, + decision: checkPath(scope, rel, root), + }; } export const __scopeLibFile = fileURLToPath(import.meta.url); + +// Re-export the workspace helpers so legacy callers don't need to learn the +// `dkg-source` module name. +export { loadDkgWorkspaceConfig, describeScope }; diff --git a/agent-scope/lib/scope.test.mjs b/agent-scope/lib/scope.test.mjs index decfd42aa..84ec5f1ef 100644 --- a/agent-scope/lib/scope.test.mjs +++ b/agent-scope/lib/scope.test.mjs @@ -1,89 +1,101 @@ // Unit tests for the scope-check library. Run with: // node --test agent-scope/lib/scope.test.mjs +// +// Focused on the pieces that are pure and don't talk to the DKG daemon: +// glob matching (`checkPath`), protected-path defaults (`checkProtected`, +// `coversProtected`), bootstrap detection, and the back-compat shims +// that hooks call on the cache. End-to-end DKG resolution is covered in +// `dkg-source.test.mjs`. import { test } from 'node:test'; import assert from 'node:assert/strict'; import { - mkdtempSync, writeFileSync, mkdirSync, rmSync, statSync, existsSync, readFileSync, + mkdtempSync, mkdirSync, rmSync, writeFileSync, } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { - checkPath, - checkProtected, - coversProtected, - validateManifest, - normalizeToRepoPath, - loadTask, - resolveActiveTaskId, - listTasks, - explainDeny, - checkNodeVersion, - PROTECTED_PATTERNS, - isBootstrapActive, + checkPath, checkProtected, coversProtected, normalizeToRepoPath, + PROTECTED_PATTERNS, isBootstrapActive, explainDeny, checkNodeVersion, + resolveActiveTaskId, loadTask, } from './scope.mjs'; -import { logDenial, logDecision, MAX_BYTES } from './log.mjs'; function makeRepo() { const root = mkdtempSync(join(tmpdir(), 'agent-scope-test-')); - mkdirSync(join(root, 'agent-scope/tasks'), { recursive: true }); + mkdirSync(join(root, 'agent-scope'), { recursive: true }); return root; } -function writeTask(root, id, body) { - writeFileSync(join(root, 'agent-scope/tasks', `${id}.json`), JSON.stringify(body, null, 2)); +function inProgressTask(allowed = [], exemptions = []) { + return { + id: 'urn:dkg:task:test', + dkgTaskUris: ['urn:dkg:task:test'], + description: 'test', + allowed, + exemptions, + reason: 'ok', + }; } // --- core decision -------------------------------------------------------- -test('checkPath: no task → allow for non-protected path', () => { +test('checkPath: no active scope → allow for non-protected path', () => { assert.equal(checkPath(null, 'any/file.ts'), 'allow'); + assert.equal(checkPath({ reason: 'no-active-task' }, 'any/file.ts'), 'allow'); }); test('checkPath: basic allow', () => { - const t = { id: 't', allowed: ['src/**/*.ts'] }; - assert.equal(checkPath(t, 'src/foo/bar.ts'), 'allow'); + assert.equal(checkPath(inProgressTask(['src/**/*.ts']), 'src/foo/bar.ts'), 'allow'); }); test('checkPath: deny when not matched', () => { - const t = { id: 't', allowed: ['src/**/*.ts'] }; - assert.equal(checkPath(t, 'lib/other.ts'), 'deny'); + assert.equal(checkPath(inProgressTask(['src/**/*.ts']), 'lib/other.ts'), 'deny'); }); test('checkPath: exemption', () => { - const t = { id: 't', allowed: ['src/**/*.ts'], exemptions: ['**/dist/**'] }; + const t = inProgressTask(['src/**/*.ts'], ['**/dist/**']); assert.equal(checkPath(t, 'anything/dist/bundle.js'), 'exempt'); }); -test('checkPath: explicit deny (!) overrides allowed', () => { - const t = { id: 't', allowed: ['src/**', '!src/**/secrets.*'] }; +test('checkPath: explicit ! deny in allowed overrides allow', () => { + const t = inProgressTask(['src/**', '!src/**/secrets.*']); assert.equal(checkPath(t, 'src/config/secrets.ts'), 'deny'); assert.equal(checkPath(t, 'src/config/public.ts'), 'allow'); }); -test('checkPath: explicit deny in exemptions overrides exemption', () => { - const t = { id: 't', allowed: ['src/**'], exemptions: ['**/dist/**', '!**/dist/secret.js'] }; +test('checkPath: explicit ! deny in exemptions overrides exemption', () => { + const t = inProgressTask(['src/**'], ['**/dist/**', '!**/dist/secret.js']); assert.equal(checkPath(t, 'foo/dist/secret.js'), 'deny'); assert.equal(checkPath(t, 'foo/dist/bundle.js'), 'exempt'); }); -test('checkPath: path traversal denied', () => { - const t = { id: 't', allowed: ['**'] }; +test('checkPath: empty / weird inputs', () => { + const t = inProgressTask(['**']); + assert.equal(checkPath(t, ''), 'deny'); assert.equal(checkPath(t, '../etc/passwd'), 'deny'); }); +test('checkPath: protected always wins over scope', () => { + const isolated = makeRepo(); + try { + const t = inProgressTask(['**']); + // Even with a wide-open scope, protected paths still deny. + assert.equal(checkPath(t, '.cursor/hooks/scope-guard.mjs', isolated), 'protected'); + assert.equal(checkPath(t, 'agent-scope/lib/scope.mjs', isolated), 'protected'); + } finally { rmSync(isolated, { recursive: true, force: true }); } +}); + // --- protected paths ------------------------------------------------------ -test('checkProtected: matches a known protected path', () => { - const isolated = makeRepo(); // no bootstrap token +test('checkProtected: matches every protected pattern', () => { + const isolated = makeRepo(); try { assert.equal(checkProtected('.cursor/hooks.json', isolated), 'deny'); assert.equal(checkProtected('.cursor/hooks/scope-guard.mjs', isolated), 'deny'); + assert.equal(checkProtected('.cursor/rules/agent-scope.mdc', isolated), 'deny'); assert.equal(checkProtected('.claude/hooks/scope-guard.mjs', isolated), 'deny'); assert.equal(checkProtected('.claude/settings.json', isolated), 'deny'); assert.equal(checkProtected('agent-scope/lib/scope.mjs', isolated), 'deny'); - assert.equal(checkProtected('agent-scope/tasks/base.json', isolated), 'deny'); - assert.equal(checkProtected('agent-scope/active', isolated), 'deny'); assert.equal(checkProtected('agent-scope/.bootstrap-token', isolated), 'deny'); assert.equal(checkProtected('AGENTS.md', isolated), 'deny'); assert.equal(checkProtected('GEMINI.md', isolated), 'deny'); @@ -96,346 +108,107 @@ test('checkProtected: normal paths pass through', () => { try { assert.equal(checkProtected('packages/core/src/index.ts', isolated), 'allow'); assert.equal(checkProtected('README.md', isolated), 'allow'); + assert.equal(checkProtected('agent-scope/README.md', isolated), 'allow'); + assert.equal(checkProtected('agent-scope/logs/audit.jsonl', isolated), 'allow'); } finally { rmSync(isolated, { recursive: true, force: true }); } }); -test('checkProtected: bootstrap env bypass', () => { - process.env.AGENT_SCOPE_BOOTSTRAP = '1'; - try { - assert.equal(checkProtected('.cursor/hooks.json'), 'allow'); - } finally { delete process.env.AGENT_SCOPE_BOOTSTRAP; } -}); - -test('checkProtected: token file bypass', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'agent-scope/.bootstrap-token'), ''); - assert.equal(isBootstrapActive(root), true); - assert.equal(checkProtected('agent-scope/lib/scope.mjs', root), 'allow'); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('checkPath: protected even with active task that would allow it', () => { - const t = { id: 't', allowed: ['**'] }; - const isolated = makeRepo(); - try { - assert.equal(checkPath(t, '.cursor/hooks.json', isolated), 'protected'); - } finally { rmSync(isolated, { recursive: true, force: true }); } -}); - -test('coversProtected: directory that IS a protected tree root', () => { +test('checkProtected: bootstrap token bypasses all', () => { const isolated = makeRepo(); try { - assert.equal(coversProtected('.cursor/hooks', isolated), true); - assert.equal(coversProtected('.cursor/hooks/', isolated), true); - assert.equal(coversProtected('agent-scope/lib', isolated), true); - assert.equal(coversProtected('agent-scope/tasks', isolated), true); + writeFileSync(join(isolated, 'agent-scope/.bootstrap-token'), ''); + assert.ok(isBootstrapActive(isolated)); + assert.equal(checkProtected('.cursor/hooks.json', isolated), 'allow'); + assert.equal(checkProtected('agent-scope/lib/scope.mjs', isolated), 'allow'); } finally { rmSync(isolated, { recursive: true, force: true }); } }); -test('coversProtected: ancestor directory of a protected tree', () => { +test('checkProtected: AGENT_SCOPE_BOOTSTRAP=1 also bypasses', () => { const isolated = makeRepo(); + const prev = process.env.AGENT_SCOPE_BOOTSTRAP; try { - assert.equal(coversProtected('.cursor', isolated), true); // contains hooks/, rules/, hooks.json - assert.equal(coversProtected('agent-scope', isolated), true); // contains lib, bin, ... - } finally { rmSync(isolated, { recursive: true, force: true }); } + process.env.AGENT_SCOPE_BOOTSTRAP = '1'; + assert.ok(isBootstrapActive(isolated)); + assert.equal(checkProtected('.cursor/hooks.json', isolated), 'allow'); + } finally { + if (prev === undefined) delete process.env.AGENT_SCOPE_BOOTSTRAP; + else process.env.AGENT_SCOPE_BOOTSTRAP = prev; + rmSync(isolated, { recursive: true, force: true }); + } }); -test('coversProtected: unrelated directory', () => { +test('coversProtected: detects a tree containing protected files', () => { const isolated = makeRepo(); try { - assert.equal(coversProtected('packages/agent', isolated), false); - assert.equal(coversProtected('README.md', isolated), false); + assert.ok(coversProtected('.cursor', isolated)); + assert.ok(coversProtected('.cursor/hooks', isolated)); + assert.ok(coversProtected('agent-scope/lib', isolated)); + assert.ok(!coversProtected('agent-scope/logs', isolated)); + assert.ok(!coversProtected('packages/core', isolated)); } finally { rmSync(isolated, { recursive: true, force: true }); } }); -test('coversProtected: bootstrap bypasses', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'agent-scope/.bootstrap-token'), ''); - assert.equal(coversProtected('.cursor', root), false); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('PROTECTED_PATTERNS: covers all system surfaces', () => { - // Sanity: make sure nothing is forgotten. The guard protects its own live - // surfaces across every supported agent (Cursor hooks + rule, Claude Code - // hooks + settings, the agent-scope library + bin CLI + task manifests + - // active-task pointer + bootstrap token, and the cross-agent rule files). - const required = [ - '.cursor/hooks/**', - '.cursor/hooks.json', - '.cursor/rules/agent-scope.mdc', - '.claude/hooks/**', - '.claude/settings.json', - 'agent-scope/lib/**', - 'agent-scope/bin/**', - 'agent-scope/schema/**', - 'agent-scope/tasks/**', - 'agent-scope/active', - 'agent-scope/.bootstrap-token', - 'AGENTS.md', - 'GEMINI.md', - '.cursorrules', - ]; - for (const p of required) assert.ok(PROTECTED_PATTERNS.includes(p), `missing protection: ${p}`); -}); - -// --- glob ----------------------------------------------------------------- - -test('glob: ** crosses directory separators', () => { - const t = { id: 't', allowed: ['pkg/**/test.ts'] }; - assert.equal(checkPath(t, 'pkg/a/b/c/test.ts'), 'allow'); - assert.equal(checkPath(t, 'pkg/test.ts'), 'allow'); -}); - -test('glob: * does not cross /', () => { - const t = { id: 't', allowed: ['pkg/*/test.ts'] }; - assert.equal(checkPath(t, 'pkg/a/test.ts'), 'allow'); - assert.equal(checkPath(t, 'pkg/a/b/test.ts'), 'deny'); -}); - -test('glob: ? matches one char', () => { - const t = { id: 't', allowed: ['file?.ts'] }; - assert.equal(checkPath(t, 'file1.ts'), 'allow'); - assert.equal(checkPath(t, 'file12.ts'), 'deny'); - assert.equal(checkPath(t, 'file.ts'), 'deny'); -}); - -test('glob: literal dots', () => { - const t = { id: 't', allowed: ['foo.bar.ts'] }; - assert.equal(checkPath(t, 'foo.bar.ts'), 'allow'); - assert.equal(checkPath(t, 'fooxbarxts'), 'deny'); -}); - -// --- path normalization -------------------------------------------------- - -test('normalizeToRepoPath: absolute → relative', () => { - assert.equal(normalizeToRepoPath('/tmp/repo', '/tmp/repo/a/b.ts'), 'a/b.ts'); -}); - -test('normalizeToRepoPath: relative stays relative', () => { - assert.equal(normalizeToRepoPath('/tmp/repo', 'a/b.ts'), 'a/b.ts'); -}); - -// --- manifest validation -------------------------------------------------- - -test('validateManifest: rejects missing id', () => { - const errs = validateManifest({ allowed: ['**'] }); - assert.ok(errs.some(e => /id/.test(e))); -}); - -test('validateManifest: requires allowed OR inherits OR exemptions', () => { - const errs = validateManifest({ id: 'x' }); - assert.ok(errs.some(e => /allowed \/ inherits \/ exemptions/.test(e))); -}); - -test('validateManifest: inherits alone is ok', () => { - const errs = validateManifest({ id: 'x', inherits: ['base'] }); - assert.deepEqual(errs, []); -}); +// --- normalisation -------------------------------------------------------- -test('validateManifest: rejects bad id chars', () => { - const errs = validateManifest({ id: 'Bad Id!', allowed: ['**'] }); - assert.ok(errs.some(e => /id/.test(e))); +test('normalizeToRepoPath: handles absolute and relative inputs', () => { + const root = '/tmp/repo'; + assert.equal(normalizeToRepoPath(root, '/tmp/repo/a/b/c.ts'), 'a/b/c.ts'); + assert.equal(normalizeToRepoPath(root, 'a/b/c.ts'), 'a/b/c.ts'); + assert.equal(normalizeToRepoPath(root, '/tmp/repo/'), ''); }); -test('validateManifest: filename mismatch', () => { - const errs = validateManifest({ id: 'foo', allowed: ['**'] }, 'bar'); - assert.ok(errs.some(e => /filename/.test(e))); -}); - -test('validateManifest: rejects unknown fields', () => { - const errs = validateManifest({ id: 'x', allowed: ['**'], secret: 1 }); - assert.ok(errs.some(e => /unknown property/.test(e))); -}); +// --- back-compat shims ---------------------------------------------------- -test('validateManifest: rejects bad inherits', () => { - const errs = validateManifest({ id: 'x', allowed: ['**'], inherits: ['Bad Id!'] }); - assert.ok(errs.some(e => /inherits/.test(e))); -}); - -test('validateManifest: accepts full valid doc', () => { - const errs = validateManifest({ - id: 'sync', - description: 'refactor sync', - owner: 'bojan', - inherits: ['base'], - allowed: ['src/**/*.ts'], - exemptions: ['**/dist/**'], - notes: 'watch out for ...', - dkg: { taskUri: 'urn:task:1' }, - }); - assert.deepEqual(errs, []); -}); - -// --- manifest loading + inheritance -------------------------------------- - -test('loadTask: returns parsed manifest', () => { - const root = makeRepo(); - try { - writeTask(root, 'x', { id: 'x', allowed: ['**/*.ts'] }); - const t = loadTask(root, 'x'); - assert.equal(t.id, 'x'); - assert.deepEqual(t.allowed, ['**/*.ts']); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('loadTask: throws on corrupt JSON', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'agent-scope/tasks/x.json'), 'not json'); - assert.throws(() => loadTask(root, 'x'), /JSON/); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('loadTask: throws on schema violation', () => { - const root = makeRepo(); - try { - writeTask(root, 'x', { id: 'x' }); - assert.throws(() => loadTask(root, 'x'), /allowed/); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('loadTask: merges allowed + exemptions from inherits', () => { - const root = makeRepo(); - try { - writeTask(root, 'base', { id: 'base', allowed: [], exemptions: ['**/dist/**'] }); - writeTask(root, 'child', { - id: 'child', inherits: ['base'], allowed: ['src/**'], exemptions: ['pnpm-lock.yaml'] - }); - const t = loadTask(root, 'child'); - assert.deepEqual(t.allowed, ['src/**']); - assert.deepEqual(t.exemptions.sort(), ['**/dist/**', 'pnpm-lock.yaml'].sort()); - assert.deepEqual(t.__inheritedFrom, ['base']); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('loadTask: inheritance cycle detected', () => { - const root = makeRepo(); - try { - writeTask(root, 'a', { id: 'a', inherits: ['b'], allowed: ['x'] }); - writeTask(root, 'b', { id: 'b', inherits: ['a'], allowed: ['y'] }); - assert.throws(() => loadTask(root, 'a'), /cycle/); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('loadTask: child deny overrides parent allow', () => { - const root = makeRepo(); - try { - writeTask(root, 'parent', { id: 'parent', allowed: ['src/**'] }); - writeTask(root, 'child', { id: 'child', inherits: ['parent'], allowed: ['!src/secrets.ts'] }); - const t = loadTask(root, 'child'); - assert.equal(checkPath(t, 'src/foo.ts'), 'allow'); - assert.equal(checkPath(t, 'src/secrets.ts'), 'deny'); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -// --- active task resolution ----------------------------------------------- - -test('resolveActiveTaskId: env beats file', () => { - const root = makeRepo(); +test('resolveActiveTaskId: with no DKG / no cache returns null id', () => { + const isolated = makeRepo(); try { - writeFileSync(join(root, 'agent-scope/active'), 'from-file\n'); - process.env.AGENT_SCOPE_TASK = 'from-env'; - const r = resolveActiveTaskId(root, { noBranch: true, noGitConfig: true }); - assert.equal(r.id, 'from-env'); - assert.equal(r.source, 'env'); + process.env.AGENT_SCOPE_ROOT = isolated; + const r = resolveActiveTaskId(isolated); + assert.equal(r.id, null); + assert.ok(r.scope); + assert.notEqual(r.scope.reason, 'ok'); } finally { - delete process.env.AGENT_SCOPE_TASK; - rmSync(root, { recursive: true, force: true }); + delete process.env.AGENT_SCOPE_ROOT; + rmSync(isolated, { recursive: true, force: true }); } }); -test('resolveActiveTaskId: file when env missing', () => { - const root = makeRepo(); - try { - writeFileSync(join(root, 'agent-scope/active'), 'from-file\n'); - delete process.env.AGENT_SCOPE_TASK; - const r = resolveActiveTaskId(root, { noBranch: true, noGitConfig: true }); - assert.equal(r.id, 'from-file'); - assert.equal(r.source, 'file'); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('resolveActiveTaskId: none when nothing set', () => { - const root = makeRepo(); - try { - delete process.env.AGENT_SCOPE_TASK; - const r = resolveActiveTaskId(root, { noBranch: true, noGitConfig: true }); - assert.equal(r.id, null); - } finally { rmSync(root, { recursive: true, force: true }); } -}); - -test('listTasks: returns sorted ids', () => { - const root = makeRepo(); - try { - writeTask(root, 'beta', { id: 'beta', allowed: ['**'] }); - writeTask(root, 'alpha', { id: 'alpha', allowed: ['**'] }); - assert.deepEqual(listTasks(root), ['alpha', 'beta']); - } finally { rmSync(root, { recursive: true, force: true }); } +test('loadTask: returns the synthetic scope passed in', () => { + const synth = inProgressTask(['src/**']); + assert.equal(loadTask('/x', null, synth), synth); }); -// --- messages ------------------------------------------------------------- - -test('explainDeny: contains task id, path, and allowed patterns', () => { - const t = { id: 'sync', description: 'sync work', allowed: ['pkg/**/sync*'] }; - const msg = explainDeny(t, 'pkg/other/x.ts', 'deny'); - assert.match(msg, /sync/); - assert.match(msg, /pkg\/other\/x\.ts/); - assert.match(msg, /pkg\/\*\*\/sync\*/); -}); +// --- explainDeny ---------------------------------------------------------- -test('explainDeny: protected path message mentions bootstrap', () => { +test('explainDeny: protected message references PROTECTED_PATTERNS + bootstrap', () => { const msg = explainDeny(null, '.cursor/hooks.json', 'protected'); assert.match(msg, /PROTECTED PATH/); assert.match(msg, /bootstrap/i); + for (const p of PROTECTED_PATTERNS) { + assert.ok(msg.includes(p), `expected ${p} in message`); + } }); -// --- node version --------------------------------------------------------- - -test('checkNodeVersion: passes for current Node', () => { - checkNodeVersion(16); -}); - -test('checkNodeVersion: throws for impossibly high version', () => { - assert.throws(() => checkNodeVersion(999)); +test('explainDeny: out-of-scope message references DKG workflow', () => { + const t = { + ...inProgressTask(['src/**']), + id: 'urn:dkg:task:demo', + dkgTaskUris: ['urn:dkg:task:demo'], + description: 'demo task', + tasks: [{ uri: 'urn:dkg:task:demo', title: 'demo' }], + }; + const msg = explainDeny(t, 'lib/other.ts', 'deny'); + assert.match(msg, /OUT OF TASK SCOPE/); + assert.match(msg, /dkg_add_task/); + assert.match(msg, /urn:dkg:task:demo/); }); -// --- logging rotation ----------------------------------------------------- - -test('log: rotates jsonl when file exceeds MAX_BYTES', () => { - const root = makeRepo(); - try { - const logsDir = join(root, 'agent-scope/logs'); - mkdirSync(logsDir, { recursive: true }); - const file = join(logsDir, 'denials.jsonl'); - // Pre-fill the log with ~MAX_BYTES of content so the next write triggers rotate. - writeFileSync(file, 'x'.repeat(MAX_BYTES + 1024)); - logDenial(root, { event: 'test', path: 'a/b.ts', task: 'x' }); - // After rotation, denials.jsonl should exist and be small again. - const after = statSync(file); - assert.ok(after.size < 1024, `expected rotated file to be small, got ${after.size}`); - } finally { rmSync(root, { recursive: true, force: true }); } -}); +// --- node version --------------------------------------------------------- -test('log: writes jsonl with timestamp + fields', () => { - const root = makeRepo(); - try { - logDenial(root, { event: 'test', path: 'a/b.ts' }); - const content = readFileSync(join(root, 'agent-scope/logs/denials.jsonl'), 'utf8'); - const rec = JSON.parse(content.trim()); - assert.ok(rec.ts); - assert.equal(rec.event, 'test'); - assert.equal(rec.path, 'a/b.ts'); - } finally { rmSync(root, { recursive: true, force: true }); } +test('checkNodeVersion: passes on current process node', () => { + assert.doesNotThrow(() => checkNodeVersion()); }); -test('log: logDecision separate file', () => { - const root = makeRepo(); - try { - logDecision(root, { event: 'ok', path: 'a.ts' }); - assert.ok(existsSync(join(root, 'agent-scope/logs/decisions.jsonl'))); - assert.ok(!existsSync(join(root, 'agent-scope/logs/denials.jsonl'))); - } finally { rmSync(root, { recursive: true, force: true }); } +test('checkNodeVersion: fails when minMajor > current', () => { + assert.throws(() => checkNodeVersion(999), /Node 999\+/); }); diff --git a/agent-scope/lib/shell-parse.mjs b/agent-scope/lib/shell-parse.mjs index bee5e9300..ff6b19846 100644 --- a/agent-scope/lib/shell-parse.mjs +++ b/agent-scope/lib/shell-parse.mjs @@ -192,106 +192,3 @@ export function bodyTouchesProtected(body, protectedPatterns) { const literals = literalsFromProtected(protectedPatterns); return literals.some(lit => lit && body.includes(lit)); } - -// --------------------------------------------------------------------------- -// Approved-task-create detector -// --------------------------------------------------------------------------- -// -// The afterShell hook normally reverts / deletes any writes to protected -// paths (including `agent-scope/tasks/**`). That's the right default — we -// don't want the agent to silently mint itself a wider task scope. -// -// BUT: the onboarding protocol ends with a plan-mode `AskQuestion` -// where the USER explicitly approves the proposed scope. Post-approval, -// the agent should be able to run `pnpm task create ...` directly -// without bouncing the command back to the user to copy-paste. -// -// This function extracts the task id from the command IFF the command -// invokes the canonical task-create CLI. The hook uses that id to -// allowlist only two specific files: -// -// - agent-scope/tasks/.json (the manifest the CLI just wrote) -// - agent-scope/active (set when --activate was passed) -// -// Everything else in `agent-scope/**` (hooks, library, bin, other tasks) -// stays hard-protected. Writes to ANY other path still get reverted. -// -// Supported invocations: -// pnpm task create [flags] -// pnpm run task create [flags] -// node agent-scope/bin/task.mjs create [flags] -// node ./agent-scope/bin/task.mjs create [flags] -// -// Returns: the task id string, or null if the command is not a valid -// task-create invocation. Task id validation matches the JSON schema -// (kebab-case, alphanumerics + hyphens/underscores, 1-64 chars). - -const TASK_ID_RE = /^[a-zA-Z0-9][a-zA-Z0-9_-]{0,63}$/; - -function looksLikePnpm(t) { - if (!t) return false; - const tail = t.split('/').pop(); - return tail === 'pnpm'; -} - -function looksLikeNode(t) { - if (!t) return false; - const tail = t.split('/').pop(); - return tail === 'node' || tail === 'node.exe'; -} - -function looksLikeTaskJs(t) { - if (!t) return false; - const norm = t.replace(/^\.\//, ''); - return ( - norm === 'agent-scope/bin/task.mjs' || - norm.endsWith('/agent-scope/bin/task.mjs') - ); -} - -// Extract the id from a single tokenized subcommand, or null. -function extractIdFromTokens(tokens) { - if (!tokens?.length) return null; - - if (looksLikePnpm(tokens[0])) { - let i = 1; - if (tokens[i] === 'run') i++; - if (tokens[i] !== 'task') return null; - if (tokens[i + 1] !== 'create') return null; - const id = tokens[i + 2]; - return id && TASK_ID_RE.test(id) ? id : null; - } - - if (looksLikeNode(tokens[0])) { - if (!looksLikeTaskJs(tokens[1])) return null; - if (tokens[2] !== 'create') return null; - const id = tokens[3]; - return id && TASK_ID_RE.test(id) ? id : null; - } - - return null; -} - -// Scan the full command (which may contain multiple sub-commands joined -// with `&&` / `||` / `;` / `|`) and return the FIRST approved task-create -// id we find, or null. -export function extractTaskCreateId(command) { - if (typeof command !== 'string' || !command.trim()) return null; - for (const sub of splitCommands(command)) { - const tokens = tokenize(sub); - const id = extractIdFromTokens(tokens); - if (id) return id; - } - return null; -} - -// Given a task id, return the POSIX relative paths the afterShell hook -// should allow past protection. Always includes the manifest and the -// `active` file. Stable sort / lowercase for comparison callers. -export function approvedTaskCreateWrites(id) { - if (!id || !TASK_ID_RE.test(id)) return new Set(); - return new Set([ - `agent-scope/tasks/${id}.json`, - 'agent-scope/active', - ]); -} diff --git a/agent-scope/lib/shell-parse.test.mjs b/agent-scope/lib/shell-parse.test.mjs index 73f686f41..d79390970 100644 --- a/agent-scope/lib/shell-parse.test.mjs +++ b/agent-scope/lib/shell-parse.test.mjs @@ -7,7 +7,6 @@ import { splitCommands, tokenize, extractRedirections, extractDestructiveTargets, extractFindTargets, extractXargsTarget, extractNestedShellBody, extractOpaqueBody, bodyHasWriteIntent, bodyTouchesProtected, - extractTaskCreateId, approvedTaskCreateWrites, } from './shell-parse.mjs'; import { PROTECTED_PATTERNS } from './scope.mjs'; @@ -231,8 +230,8 @@ test('bodyTouchesProtected: agent-scope/lib/scope.mjs', () => { assert.ok(bodyTouchesProtected("open('agent-scope/lib/scope.mjs', 'w')", PROTECTED_PATTERNS)); }); -test('bodyTouchesProtected: agent-scope/active', () => { - assert.ok(bodyTouchesProtected("fs.writeFileSync('agent-scope/active', 'evil')", PROTECTED_PATTERNS)); +test('bodyTouchesProtected: agent-scope/.bootstrap-token', () => { + assert.ok(bodyTouchesProtected("fs.writeFileSync('agent-scope/.bootstrap-token', 'evil')", PROTECTED_PATTERNS)); }); test('bodyTouchesProtected: normal path does not match', () => { @@ -242,7 +241,7 @@ test('bodyTouchesProtected: normal path does not match', () => { // --- composite scenarios (the gap we're closing) -------------------------- test('scenario: node -e + fs.writeFileSync + protected path is flagged', () => { - const cmd = "node -e \"require('fs').writeFileSync('agent-scope/active', 'evil')\""; + const cmd = "node -e \"require('fs').writeFileSync('agent-scope/lib/scope.mjs', 'evil')\""; const tokens = tokenize(cmd); const opaque = extractOpaqueBody(tokens); assert.ok(opaque); @@ -279,135 +278,10 @@ test('scenario: benign node command (read-only) is not flagged', () => { assert.equal(bodyHasWriteIntent(opaque.body), false); }); -// --- extractTaskCreateId + approvedTaskCreateWrites ---------------------- -// The afterShell hook uses these to allow the ONE specific write that a -// `pnpm task create ` invocation legitimately produces. Everything -// else under agent-scope/** still gets reverted. - -test('extractTaskCreateId: pnpm task create returns the id', () => { - assert.equal( - extractTaskCreateId('pnpm task create peer-sync-auth --inherits base --activate'), - 'peer-sync-auth', - ); -}); - -test('extractTaskCreateId: pnpm run task create returns the id', () => { - assert.equal( - extractTaskCreateId('pnpm run task create my-id'), - 'my-id', - ); -}); - -test('extractTaskCreateId: node agent-scope/bin/task.mjs create ', () => { - assert.equal( - extractTaskCreateId('node agent-scope/bin/task.mjs create my-id --activate'), - 'my-id', - ); -}); - -test('extractTaskCreateId: node ./agent-scope/bin/task.mjs create ', () => { - assert.equal( - extractTaskCreateId('node ./agent-scope/bin/task.mjs create my-id'), - 'my-id', - ); -}); - -test('extractTaskCreateId: absolute node path works', () => { - assert.equal( - extractTaskCreateId('/usr/local/bin/node agent-scope/bin/task.mjs create my-id'), - 'my-id', - ); -}); - -test('extractTaskCreateId: absolute pnpm path works', () => { - assert.equal( - extractTaskCreateId('/opt/homebrew/bin/pnpm task create my-id --activate'), - 'my-id', - ); -}); - -test('extractTaskCreateId: quoted id is accepted', () => { - assert.equal( - extractTaskCreateId('pnpm task create "my-id" --description "x"'), - 'my-id', - ); -}); - -test('extractTaskCreateId: chained command — picks up the create sub-command', () => { - assert.equal( - extractTaskCreateId('git add . && pnpm task create my-id --activate'), - 'my-id', - ); - assert.equal( - extractTaskCreateId('pnpm task create my-id --activate && pnpm task show'), - 'my-id', - ); -}); - -test('extractTaskCreateId: pnpm task list / set / clear / show → null', () => { - assert.equal(extractTaskCreateId('pnpm task list'), null); - assert.equal(extractTaskCreateId('pnpm task set my-id'), null); - assert.equal(extractTaskCreateId('pnpm task clear'), null); - assert.equal(extractTaskCreateId('pnpm task show'), null); -}); - -test('extractTaskCreateId: missing id → null', () => { - assert.equal(extractTaskCreateId('pnpm task create'), null); - assert.equal(extractTaskCreateId('pnpm task create --activate'), null); -}); - -test('extractTaskCreateId: id starts with hyphen (flag-like) → null', () => { - assert.equal(extractTaskCreateId('pnpm task create --my-id'), null); - assert.equal(extractTaskCreateId('pnpm task create -foo'), null); -}); - -test('extractTaskCreateId: id with path-escape / special chars → null', () => { - assert.equal(extractTaskCreateId('pnpm task create ../evil'), null); - assert.equal(extractTaskCreateId('pnpm task create my/id'), null); - assert.equal(extractTaskCreateId('pnpm task create "my id"'), null); - assert.equal(extractTaskCreateId('pnpm task create my.id'), null); - assert.equal(extractTaskCreateId('pnpm task create $(whoami)'), null); -}); - -test('extractTaskCreateId: impostor that writes to tasks/ directly → null', () => { - assert.equal( - extractTaskCreateId('echo \'{"id":"evil"}\' > agent-scope/tasks/evil.json'), - null, - ); - assert.equal( - extractTaskCreateId('cp /tmp/m.json agent-scope/tasks/evil.json'), - null, - ); - assert.equal( - extractTaskCreateId("node -e \"require('fs').writeFileSync('agent-scope/tasks/evil.json', '{}')\""), - null, - ); -}); - -test('extractTaskCreateId: non-canonical scripts → null (forces bootstrap)', () => { - assert.equal(extractTaskCreateId('npm run task create my-id'), null); - assert.equal(extractTaskCreateId('yarn task create my-id'), null); - assert.equal(extractTaskCreateId('bun task create my-id'), null); -}); - -test('extractTaskCreateId: non-string / empty → null', () => { - assert.equal(extractTaskCreateId(''), null); - assert.equal(extractTaskCreateId(null), null); - assert.equal(extractTaskCreateId(undefined), null); - assert.equal(extractTaskCreateId(42), null); - assert.equal(extractTaskCreateId(' '), null); -}); - -test('approvedTaskCreateWrites: returns manifest + active paths for valid id', () => { - const s = approvedTaskCreateWrites('peer-sync-auth'); - assert.equal(s.size, 2); - assert.ok(s.has('agent-scope/tasks/peer-sync-auth.json')); - assert.ok(s.has('agent-scope/active')); -}); - -test('approvedTaskCreateWrites: invalid / empty id → empty set', () => { - assert.equal(approvedTaskCreateWrites(null).size, 0); - assert.equal(approvedTaskCreateWrites('').size, 0); - assert.equal(approvedTaskCreateWrites('../evil').size, 0); - assert.equal(approvedTaskCreateWrites('a/b').size, 0); -}); +// Note: the legacy `extractTaskCreateId` / `approvedTaskCreateWrites` +// helpers (and their tests) used to live here. They allowed the +// afterShell hook to whitelist the ONE write a `pnpm task create ` +// invocation produced into `agent-scope/tasks/`. Both the helpers and +// the file-based task flow are gone — scope now lives entirely in the +// DKG (`tasks:Task` + `tasks:scopedToPath`) and there is no longer a +// shell command that legitimately writes inside `agent-scope/`. diff --git a/agent-scope/schema/task.schema.json b/agent-scope/schema/task.schema.json deleted file mode 100644 index 75d83c757..000000000 --- a/agent-scope/schema/task.schema.json +++ /dev/null @@ -1,48 +0,0 @@ -{ - "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://dkg.io/agent-scope/task.schema.json", - "title": "agent-scope Task Manifest", - "description": "Declares what files a task may modify. See agent-scope/README.md.", - "type": "object", - "required": ["id"], - "additionalProperties": false, - "anyOf": [ - { "required": ["allowed"] }, - { "required": ["inherits"] }, - { "required": ["exemptions"] } - ], - "properties": { - "id": { - "type": "string", - "pattern": "^[a-z0-9][a-z0-9-_.]{0,63}$", - "description": "Task identifier. Must match the filename (without .json)." - }, - "description": { "type": "string" }, - "owner": { "type": "string" }, - "created": { "type": "string", "format": "date-time" }, - "inherits": { - "type": "array", - "items": { "type": "string", "pattern": "^[a-z0-9][a-z0-9-_.]{0,63}$" }, - "description": "Task ids to inherit `allowed` and `exemptions` from (merged, parents first)." - }, - "allowed": { - "type": "array", - "items": { "type": "string", "minLength": 1 }, - "description": "Glob patterns this task may write to. Supports *, **, ?. Prefix with ! to negate." - }, - "exemptions": { - "type": "array", - "items": { "type": "string", "minLength": 1 }, - "description": "Patterns always allowed (build artifacts, lockfiles, generated files)." - }, - "notes": { "type": "string" }, - "dkg": { - "type": "object", - "additionalProperties": false, - "properties": { - "taskUri": { "type": "string" }, - "sessionUri": { "type": "string" } - } - } - } -} diff --git a/agent-scope/tasks/base.json b/agent-scope/tasks/base.json deleted file mode 100644 index 21eb5972e..000000000 --- a/agent-scope/tasks/base.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "id": "base", - "description": "Shared exemptions for most tasks — extend via `inherits: [\"base\"]`.", - "owner": "agent-scope", - "allowed": [], - "exemptions": [ - "**/dist/**", - "**/build/**", - "**/*.tsbuildinfo", - "**/node_modules/**", - "pnpm-lock.yaml", - "package-lock.json", - "yarn.lock", - "**/coverage/**", - "**/.turbo/**", - "**/.next/**", - "**/.vite/**" - ], - "notes": "This base task has no `allowed` patterns of its own. Child tasks must provide their own `allowed`. The base only contributes build-artifact / lockfile exemptions that apply to most tasks." -} diff --git a/package.json b/package.json index d0f28c3ce..895a60b8f 100644 --- a/package.json +++ b/package.json @@ -26,11 +26,8 @@ "test:evm": "./scripts/test-evm-integration.sh all", "test:e2e:ui": "pnpm --filter @origintrail-official/dkg-node-ui test:e2e", "test:all": "pnpm test && pnpm test:evm", - "task": "node agent-scope/bin/task.mjs", - "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/onboarding.test.mjs agent-scope/lib/check-agent.test.mjs agent-scope/lib/prompter.test.mjs", - "scope:validate": "node agent-scope/bin/task.mjs validate", - "scope:status": "node agent-scope/bin/task.mjs resolve && echo && node agent-scope/bin/task.mjs show", - "scope:check-agent": "node agent-scope/bin/task.mjs check-agent" + "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/dkg-source.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/check-agent.test.mjs", + "scope:check-agent": "node agent-scope/lib/check-agent.mjs" }, "devDependencies": { "@types/node": "^22", diff --git a/packages/mcp-dkg/hooks/capture-chat.mjs b/packages/mcp-dkg/hooks/capture-chat.mjs index 382d77779..926ed1fa1 100755 --- a/packages/mcp-dkg/hooks/capture-chat.mjs +++ b/packages/mcp-dkg/hooks/capture-chat.mjs @@ -1086,6 +1086,7 @@ When calling \`dkg_annotate_turn\`, ALWAYS pass \`forSession: "${sessionKey}"\`. - \`chat:proposes\` (URI) — ideas/decisions/tasks put forward - \`chat:concludes\` (URI) — Findings worth preserving - \`chat:asks\` (URI) — open Questions +- \`chat:worksOn\` (URI) — the \`tasks:Task\` this turn is working on (emit while a task is \`in_progress\`) Call \`dkg_get_ontology\` for the full agent guide + formal Turtle (one-time per session). diff --git a/packages/mcp-dkg/src/tools/annotations.ts b/packages/mcp-dkg/src/tools/annotations.ts index 6285efc63..b71ace433 100644 --- a/packages/mcp-dkg/src/tools/annotations.ts +++ b/packages/mcp-dkg/src/tools/annotations.ts @@ -275,6 +275,7 @@ ${ttl || '# (missing — re-run import-ontology.mjs)'} examines: z.array(z.string()).optional().describe('chat:examines URIs — entities the turn analysed in detail (vs just citing).'), concludes: z.array(z.string()).optional().describe('chat:concludes URIs — Findings the turn produced. Bare strings minted as urn:dkg:finding:.'), asks: z.array(z.string()).optional().describe('chat:asks URIs — Questions the turn left open. Bare strings minted as urn:dkg:question:.'), + worksOn: z.string().optional().describe('chat:worksOn URI — the tasks:Task this turn is working on. Emit on every substantive turn while a task is in_progress so retrospective queries like "what did agent X discuss while working on task Y" resolve to a single SPARQL. Pure observability — does NOT change the agent-scope guard\'s scope (that derives from tasks:scopedToPath on the task itself). Pass the full task URI (e.g. `urn:dkg:task:refactor-...`); bare strings are wrapped as urn:dkg:task:.'), proposedDecisions: z.array(z.object({ title: z.string(), context: z.string(), @@ -359,6 +360,11 @@ ${ttl || '# (missing — re-run import-ontology.mjs)'} if (!eUri) { skippedEmptyLabels.push(e); continue; } emit(triples, U(turnUri), U(NS.chat + 'examines'), U(eUri)); } + if (args.worksOn) { + const wUri = toUri(args.worksOn, 'task'); + if (wUri) emit(triples, U(turnUri), U(NS.chat + 'worksOn'), U(wUri)); + else skippedEmptyLabels.push(args.worksOn); + } // Findings — referenced via chat:concludes; minted as :Finding entities for (const f of args.concludes ?? []) { @@ -474,7 +480,7 @@ ${ttl || '# (missing — re-run import-ontology.mjs)'} if (triples.length === 0) { return errResult( - 'Empty annotation. Pass at least one of: topics, mentions, examines, concludes, asks, proposedDecisions, proposedTasks, comments, vmPublishRequests.', + 'Empty annotation. Pass at least one of: topics, mentions, examines, concludes, asks, worksOn, proposedDecisions, proposedTasks, comments, vmPublishRequests.', ); } @@ -574,6 +580,7 @@ function buildSummary( examines: args.examines?.length ?? 0, concludes: args.concludes?.length ?? 0, asks: args.asks?.length ?? 0, + worksOn: args.worksOn ? 1 : 0, proposedDecisions: args.proposedDecisions?.length ?? 0, proposedTasks: args.proposedTasks?.length ?? 0, comments: args.comments?.length ?? 0, diff --git a/packages/mcp-dkg/src/tools/writes.ts b/packages/mcp-dkg/src/tools/writes.ts index 094bb533e..f04ef7841 100644 --- a/packages/mcp-dkg/src/tools/writes.ts +++ b/packages/mcp-dkg/src/tools/writes.ts @@ -70,6 +70,7 @@ const LabelP = NS.rdfs + 'label'; const NameP = NS.schema + 'name'; const TitleP = NS.dcterms + 'title'; const CreatedP = NS.dcterms + 'created'; +const ModifiedP = NS.dcterms + 'modified'; const AttrP = NS.prov + 'wasAttributedTo'; const XSD_INT = 'http://www.w3.org/2001/XMLSchema#integer'; @@ -237,7 +238,13 @@ export function registerWriteTools( title: 'Add Task', description: 'Author a `tasks:Task` and auto-promote to SWM. Use when the agent ' + - 'wants to file follow-up work detected during a chat (e.g. "revisit ' + + 'wants to label a piece of work in the project graph — both for follow-up ' + + 'tracking AND, when status is `in_progress`, to declare the operational ' + + 'scope the agent-scope write-time guard will allow (via `scopedToPath`). ' + + 'The guard takes the union of `scopedToPath` globs across all `in_progress` ' + + 'tasks attributed to this agent on this project as the live allow-list. ' + + 'When the work is finished, flip status to `done` via `dkg_update_task_status`. ' + + 'Use also when the agent wants to file follow-up work detected during a chat (e.g. "revisit ' + 'SHACL on promote path"). Attribution via prov:wasAttributedTo.', inputSchema: { title: z.string().describe('Imperative, e.g. "Add SHACL validation on /promote endpoint".'), @@ -248,10 +255,17 @@ export function registerWriteTools( dueDate: z.string().optional().describe('ISO date (YYYY-MM-DD).'), relatedDecision: z.array(z.string()).optional().describe('Decision slugs or full URIs.'), touches: z.array(z.string()).optional().describe('File or package URIs that the task edits.'), + scopedToPath: z.array(z.string()).optional().describe( + 'Glob patterns (relative to repo root) this task is allowed to write while in_progress. ' + + 'These are the operational allow-list the agent-scope write-time guard evaluates: ' + + 'when status is "in_progress" and the task is attributed to the running agent, the union ' + + 'of these globs forms that agent\'s scope on this CG. Bang-prefixed patterns ("!**/secrets.*") ' + + 'are explicit denies. Example: ["packages/agent/**", "packages/core/src/sync/**", "!**/secrets.*"].' + ), projectId: z.string().optional(), }, }, - async ({ title, status, priority, assignee, estimate, dueDate, relatedDecision, touches, projectId }): Promise => { + async ({ title, status, priority, assignee, estimate, dueDate, relatedDecision, touches, scopedToPath, projectId }): Promise => { const pid = resolveProject(projectId, config); if (!pid) return projectErr(); if (!config.agentUri) return agentErr(); @@ -270,7 +284,14 @@ export function registerWriteTools( emit(triples, U(id), U(NameP), L(title)); emit(triples, U(id), U(LabelP), L(title)); emit(triples, U(id), U(TitleP), L(title)); - emit(triples, U(id), U(NS.tasks + 'status'), L(st)); + // NB: tasks:status does NOT live on the main task assertion. It lives + // in a dedicated `task-status--` assertion that gets + // discardAssertion'd on every status flip (see below + `dkg_update_task_status`). + // The daemon's main /write endpoint is additive — if we put `tasks:status` + // here, a later "done" flip would coexist with the original "in_progress" + // and the agent-scope guard's SPARQL would see both. Splitting the status + // out into its own discardable assertion gives us replace semantics + // without losing the other fields. emit(triples, U(id), U(NS.tasks + 'priority'), L(pr)); emit(triples, U(id), U(CreatedP), L(nowIso, XSD_DATETIME)); if (typeof estimate === 'number') emit(triples, U(id), U(NS.tasks + 'estimate'), L(estimate, XSD_INT)); @@ -288,9 +309,25 @@ export function registerWriteTools( emit(triples, U(id), U(NS.tasks + 'relatedDecision'), U(decUri)); } for (const t of touches ?? []) emit(triples, U(id), U(NS.tasks + 'touches'), U(t)); + for (const p of scopedToPath ?? []) { + const trimmed = String(p).trim(); + if (!trimmed) continue; + emit(triples, U(id), U(NS.tasks + 'scopedToPath'), L(trimmed)); + } emit(triples, U(id), U(AttrP), U(config.agentUri)); const assertion = `agent-task-${slug}-${rand(4)}`; + // Status lives in its own deterministic assertion so future status + // flips can `discardAssertion` it cleanly. Name is keyed off the + // task URI tail (the slug + fingerprint) so a same-URI re-create + // converges on the same status assertion. + const uriTail = id.replace(/^urn:dkg:task:/, ''); + const statusAssertion = `task-status-${uriTail}`; + const statusTriples: Array<{ subject: string; predicate: string; object: string }> = []; + emit(statusTriples, U(id), U(NS.tasks + 'status'), L(st)); + emit(statusTriples, U(id), U(ModifiedP), L(nowIso, XSD_DATETIME)); + emit(statusTriples, U(id), U(AttrP), U(config.agentUri)); + try { await client.ensureSubGraph(pid, 'tasks'); await client.writeAssertion({ @@ -299,6 +336,25 @@ export function registerWriteTools( subGraphName: 'tasks', triples, }); + // Discard any prior status assertion first (defensive — handles the + // edge case where an agent re-runs `dkg_add_task` against an URI + // that converged with a previously-written task) and write the + // current status fresh. + try { + await client.discardAssertion({ + contextGraphId: pid, + assertionName: statusAssertion, + subGraphName: 'tasks', + }); + } catch { + /* nothing to discard on first write */ + } + await client.writeAssertion({ + contextGraphId: pid, + assertionName: statusAssertion, + subGraphName: 'tasks', + triples: statusTriples, + }); let shared = false; if (config.capture.autoShare) { try { @@ -308,10 +364,16 @@ export function registerWriteTools( subGraphName: 'tasks', entities: [id], }); + await client.promoteAssertion({ + contextGraphId: pid, + assertionName: statusAssertion, + subGraphName: 'tasks', + entities: [id], + }); shared = true; } catch (e) { return ok( - `Task written but promote failed: ${formatError(e)}\n\n- **URI**: \`${id}\`\n- **assertion**: \`${assertion}\`\n- **layer**: WM only`, + `Task written but promote failed: ${formatError(e)}\n\n- **URI**: \`${id}\`\n- **assertion**: \`${assertion}\`\n- **status assertion**: \`${statusAssertion}\`\n- **layer**: WM only`, ); } } @@ -319,8 +381,13 @@ export function registerWriteTools( `✔ Task ${shared ? '**shared** (WM → SWM)' : 'written to WM'}:\n\n` + `- **URI**: \`${id}\`\n` + `- **status**: ${st} · **priority**: ${pr}${assignee ? ` · **assignee**: ${assignee}` : ''}\n` + + (scopedToPath && scopedToPath.length + ? `- **scopedToPath**: ${scopedToPath.length} glob${scopedToPath.length === 1 ? '' : 's'}` + + (st === 'in_progress' ? ' (live in agent-scope allow-list)' : ' (will activate when status is `in_progress`)') + + '\n' + : '') + `- **attributed to**: \`${config.agentUri}\`\n` + - `- **assertion**: \`${assertion}\``, + `- **assertion**: \`${assertion}\` · **status assertion**: \`${statusAssertion}\``, ); } catch (e) { return errResult(`Failed to add task: ${formatError(e)}`); @@ -328,6 +395,119 @@ export function registerWriteTools( }, ); + // ── dkg_update_task_status ─────────────────────────────────── + server.registerTool( + 'dkg_update_task_status', + { + title: 'Update Task Status', + description: + 'Flip an existing `tasks:Task`\'s status (e.g. todo → in_progress → done). ' + + 'Marks the entity with a fresh `dcterms:modified` so the agent-scope ' + + 'guard\'s "most-recent status wins" SPARQL picks up the change ' + + '(the daemon\'s assertion writes are additive, so the previous ' + + '`tasks:status` triple still lives in the graph — the timestamp ' + + 'is what disambiguates). Use this to mark `in_progress` when you ' + + 'start work (which makes the task\'s `scopedToPath` globs the ' + + 'active allow-list) and `done` when you ship — that retracts the ' + + 'scope and frees the agent for the next task.', + inputSchema: { + taskUri: z.string().describe('Full URI of the `tasks:Task` to update (e.g. `urn:dkg:task:refactor-peer-sync-1a2b`).'), + status: z.enum(['todo', 'in_progress', 'blocked', 'done', 'cancelled']), + note: z.string().optional().describe('Optional one-line rationale; surfaces as `rdfs:comment` on the update.'), + projectId: z.string().optional(), + }, + }, + async ({ taskUri, status, note, projectId }): Promise => { + const pid = resolveProject(projectId, config); + if (!pid) return projectErr(); + if (!config.agentUri) return agentErr(); + const nowIso = new Date().toISOString(); + // Status flips replace the dedicated `task-status-` assertion + // (NOT the main task assertion) so the daemon's additive /write + // semantics don't end up with a `tasks:status "in_progress"` triple + // coexisting with a `tasks:status "done"` one. discardAssertion + // wipes the prior status graph; writeAssertion sets the fresh value. + // See the matching pattern in `dkg_add_task`. + const uriTail = taskUri.replace(/^urn:dkg:task:/, '').replace(/[^A-Za-z0-9._-]+/g, '-'); + const statusAssertion = `task-status-${uriTail}`; + const triples: Array<{ subject: string; predicate: string; object: string }> = []; + emit(triples, U(taskUri), U(NS.tasks + 'status'), L(status)); + emit(triples, U(taskUri), U(ModifiedP), L(nowIso, XSD_DATETIME)); + emit(triples, U(taskUri), U(AttrP), U(config.agentUri)); + if (note) emit(triples, U(taskUri), U(NS.rdfs + 'comment'), L(note)); + + // Optional rotating audit log of every flip — additive, never discarded — + // so retrospective queries can still reconstruct status history if needed. + const historyAssertion = `agent-task-status-log-${rand(6)}`; + const historyTriples: Array<{ subject: string; predicate: string; object: string }> = []; + const eventUri = `urn:dkg:task-status-event:${uriTail}-${Date.now()}`; + emit(historyTriples, U(eventUri), U(TypeP), U(NS.tasks + 'StatusEvent')); + emit(historyTriples, U(eventUri), U(NS.tasks + 'aboutTask'), U(taskUri)); + emit(historyTriples, U(eventUri), U(NS.tasks + 'eventStatus'), L(status)); + emit(historyTriples, U(eventUri), U(CreatedP), L(nowIso, XSD_DATETIME)); + emit(historyTriples, U(eventUri), U(AttrP), U(config.agentUri)); + if (note) emit(historyTriples, U(eventUri), U(NS.rdfs + 'comment'), L(note)); + + try { + await client.ensureSubGraph(pid, 'tasks'); + try { + await client.discardAssertion({ + contextGraphId: pid, + assertionName: statusAssertion, + subGraphName: 'tasks', + }); + } catch { + /* discard on a non-existent assertion is a no-op upstream, but some + daemon builds throw — swallow either way; we're about to write fresh */ + } + await client.writeAssertion({ + contextGraphId: pid, + assertionName: statusAssertion, + subGraphName: 'tasks', + triples, + }); + await client.writeAssertion({ + contextGraphId: pid, + assertionName: historyAssertion, + subGraphName: 'tasks', + triples: historyTriples, + }); + if (config.capture.autoShare) { + try { + await client.promoteAssertion({ + contextGraphId: pid, + assertionName: statusAssertion, + subGraphName: 'tasks', + entities: [taskUri], + }); + await client.promoteAssertion({ + contextGraphId: pid, + assertionName: historyAssertion, + subGraphName: 'tasks', + entities: [eventUri], + }); + } catch (e) { + return ok( + `Status written but promote failed: ${formatError(e)}\n\n- **task**: \`${taskUri}\`\n- **status**: ${status}\n- **status assertion**: \`${statusAssertion}\``, + ); + } + } + return ok( + `✔ Task \`${taskUri}\` status set to **${status}**.\n\n` + + `- **modified at**: ${nowIso}\n` + + `- **attributed to**: \`${config.agentUri}\`\n` + + (status === 'in_progress' + ? '\nThis task\'s `scopedToPath` globs are now part of the agent-scope allow-list.' + : status === 'done' || status === 'cancelled' + ? '\nThis task no longer contributes to the agent-scope allow-list.' + : ''), + ); + } catch (e) { + return errResult(`Failed to update task status: ${formatError(e)}`); + } + }, + ); + // ── dkg_comment ────────────────────────────────────────────── server.registerTool( 'dkg_comment', diff --git a/packages/mcp-dkg/templates/ontologies/coding-project/agent-guide.md b/packages/mcp-dkg/templates/ontologies/coding-project/agent-guide.md index fa0f3f4c4..2b46fb19f 100644 --- a/packages/mcp-dkg/templates/ontologies/coding-project/agent-guide.md +++ b/packages/mcp-dkg/templates/ontologies/coding-project/agent-guide.md @@ -31,16 +31,38 @@ The universal predicates apply to ANY project type. Reach for these first; they' | `chat:proposes` (URI) | An idea/decision/task put forward. Often points at a freshly-minted Decision or Task entity created in the same `dkg_annotate_turn` call. | 0..N | | `chat:concludes` (URI) | A `:Finding` entity the turn produced — a claim worth preserving as its own node. | 0..N | | `chat:asks` (URI) | A `:Question` entity the turn left open. Surfaces "what did we never resolve". | 0..N | +| `chat:worksOn` (URI) | The `tasks:Task` this turn is working on. Emit on every substantive turn while a task is `in_progress`. Pure observability — NOT what the agent-scope guard reads (it derives scope from `tasks:scopedToPath` on the task itself). | 0..1 | ## 2. Coding-project-specific entities When the turn discusses architecture or work, also use the project-flavoured tools (which `dkg_annotate_turn` wraps for you): - **Decision** (`decisions:Decision`) — when the turn settled an architectural question. Required fields: `title`, `context`, `outcome`. Optional: `consequences`, `status` (default `proposed`). -- **Task** (`tasks:Task`) — when the turn identified work to do. Required: `title`. Optional: `priority` (p0..p3), `status`, `assignee`, `relatedDecision`, `touches`. +- **Task** (`tasks:Task`) — when the turn identified work to do, OR when the agent is about to start a piece of work and wants the **agent-scope write-time guard** to allow it. Required: `title`. Optional: `priority` (p0..p3), `status`, `assignee`, `relatedDecision`, `touches`, **`scopedToPath`**. See §2.1. - **Comment** (`schema:Comment`) — when the turn made a remark ABOUT an existing entity. Required: `about` (URI), `body`. - **VmPublishRequest** (`dkg:VmPublishRequest`) — when the turn surfaced something worth anchoring on-chain. Required: `entityUri`, `rationale`. The agent NEVER publishes to VM directly; this writes a marker that a human ratifies via the node-ui's VerifyOnDkgButton. +### 2.1 Tasks ARE the agent-scope source of truth + +This repo's agent-scope guard (Cursor/Claude Code/Codex/Gemini) reads its allow-list of writable paths **straight from the DKG**. There is no `agent-scope/tasks/*.json`, no `pnpm task ...` CLI, no local "active task" file. The flow is: + +1. **Plan first.** When the user starts a new piece of work, propose the scope as a single short question ("I'd like to refactor X — scope to `packages/agent/**`, `packages/core/**`, inherit the standard build-artefact exemptions. Sound good?"). Use `AskQuestion` with two options: `go` (recommended) and `custom_instruction` (free text). Don't write any files yet. +2. **On `go`, file the task in the DKG.** Call `dkg_add_task` with: + - `title` — imperative summary of the work + - `status: "in_progress"` — this flips the scope live immediately + - `scopedToPath: ["packages/agent/**", "packages/core/**", "!**/secrets.*"]` — the operational allow-list + - `touches: []` — for human readability / cross-referencing (NOT what the guard evaluates; that's `scopedToPath`) +3. **Label your turns.** On every substantive turn while the task is in progress, include `worksOn: ` in `dkg_annotate_turn` (or emit `chat:worksOn` directly). This is pure observability — it's how a teammate later runs "what did Claude discuss while working on the peer-sync refactor" as a single SPARQL. +4. **Flip status when you ship.** Call `dkg_update_task_status({ taskUri, status: "done" })`. The scope is retracted automatically — the guard now blocks writes to those globs again until a new task goes `in_progress`. + +The guard's SPARQL, for reference: it queries for every `tasks:Task` where the most recent `tasks:status` (by `dcterms:modified`) is `"in_progress"` AND `prov:wasAttributedTo = `, then takes the **union** of their `tasks:scopedToPath` literals as the live allow-list. So: + +- One in-progress task → that task's globs are your scope. +- Multiple in-progress tasks (e.g. you spun up two parallel work items) → union of all their globs. Both scopes apply simultaneously. +- Zero in-progress tasks → nothing the agent writes is in scope (default-deny). The session-start hook will surface this and prompt you to create one. + +**Never improvise around a denial** — if the guard blocks a write you needed to make, the denial menu always offers a free-text "tell me what to do instead" option. Use it. Common resolutions: extend the existing task's `scopedToPath` (file a new in-progress task for the orthogonal scope), or ask the operator to enable bootstrap mode for the protected-path edge cases. + ## 3. URI patterns (memorise these) ``` @@ -102,6 +124,47 @@ dkg_annotate_turn({ }) ``` +### Example A.1 — turn that STARTS a piece of work (and pins the scope) + +User said: *"yes, go ahead with that scope."* (after you proposed `packages/agent/**` + `packages/core/**` for a peer-sync refactor) + +```jsonc +// First, file the task with status: "in_progress" and the scope. +// This single tool call is what makes the agent-scope guard allow the +// upcoming writes — there is no other "activate task" step. +dkg_add_task({ + title: "Refactor peer sync to use the new workspace auth", + status: "in_progress", + priority: "p1", + scopedToPath: [ + "packages/agent/**", + "packages/core/**", + "!**/secrets.*" + ], + touches: [ + "urn:dkg:code:package:%40origintrail-official%2Fdkg-agent", + "urn:dkg:code:package:%40origintrail-official%2Fdkg-core" + ] +}) + +// Then annotate this turn — and every subsequent turn while the task +// is in_progress — with chat:worksOn so the chat trail is queryable. +dkg_annotate_turn({ + topics: ["peer sync", "workspace auth"], + worksOn: "urn:dkg:task:refactor-peer-sync-to-use-the-new-workspace-auth-" +}) +``` + +When the work ships: + +```jsonc +dkg_update_task_status({ + taskUri: "urn:dkg:task:refactor-peer-sync-to-use-the-new-workspace-auth-", + status: "done", + note: "Merged in PR #312." +}) +``` + The Decision and Task get fresh URIs (`urn:dkg:decision:adopt-tree-sitter-for-python-ast-parsing-...`) attributed to you via `prov:wasAttributedTo`, auto-promoted to SWM, instantly visible to teammates' agents. The `chat:proposes` edge from the turn to the Decision is created automatically. ### Example B — turn that just discusses without deciding diff --git a/packages/mcp-dkg/templates/ontologies/coding-project/ontology.ttl b/packages/mcp-dkg/templates/ontologies/coding-project/ontology.ttl index ba9c1242d..d9b6846ce 100644 --- a/packages/mcp-dkg/templates/ontologies/coding-project/ontology.ttl +++ b/packages/mcp-dkg/templates/ontologies/coding-project/ontology.ttl @@ -93,6 +93,13 @@ chat:asks rdfs:domain chat:Turn ; rdfs:range :Question . +chat:worksOn + a owl:ObjectProperty ; + rdfs:label "works on" ; + rdfs:comment "The tasks:Task this chat turn is working on. Pure observability — agents emit this on every substantive turn while a task is in_progress, so retrospective queries like 'what did agent X discuss while working on task Y' resolve to a single SPARQL. NOT used by the write-time guard (that derives scope from tasks:scopedToPath on the task itself)." ; + rdfs:domain chat:Turn ; + rdfs:range tasks:Task . + # ─── Section 2 — Universal annotation entity classes ──────────────── :Concept @@ -209,7 +216,33 @@ tasks:touches rdfs:label "touches" ; rdfs:domain tasks:Task ; rdfs:range owl:Thing ; - rdfs:comment "A code:File or code:Package this task edits." . + rdfs:comment "A code:File or code:Package this task edits. Human-readable record; NOT what the write-time guard evaluates (use tasks:scopedToPath for that)." . + +tasks:scopedToPath + a owl:DatatypeProperty ; + rdfs:label "scoped to path" ; + rdfs:domain tasks:Task ; + rdfs:range xsd:string ; + rdfs:comment "Glob pattern (relative to repo root) the agent IS allowed to write while this task is `in_progress`. Multivalued: emit one triple per glob. Bang-prefixed patterns are explicit denies. Used by the agent-scope write-time guard — the union of these globs across all in_progress tasks attributed to the agent forms the allow-list for that session. Example values: \"packages/agent/**\", \"packages/core/src/sync/**\", \"!**/secrets.*\"." . + +tasks:StatusEvent + a owl:Class ; + rdfs:label "Task Status Event" ; + rdfs:comment "An immutable record of a tasks:Task transitioning into a particular status at a particular time. Written by `dkg_update_task_status` alongside the (replace-semantics) status flip on the task itself, so retrospective queries can reconstruct the task's full state history without polluting the live `tasks:status` triple." . + +tasks:aboutTask + a owl:ObjectProperty ; + rdfs:label "about task" ; + rdfs:domain tasks:StatusEvent ; + rdfs:range tasks:Task ; + rdfs:comment "The task this status event is about." . + +tasks:eventStatus + a owl:DatatypeProperty ; + rdfs:label "event status" ; + rdfs:domain tasks:StatusEvent ; + rdfs:range xsd:string ; + rdfs:comment "The status the task transitioned INTO at this event." . # Code structure — aligned with schema:SoftwareSourceCode + DOAP. From d6c63a86cb65d1aec384c0eb1d737f9d3bd91a47 Mon Sep 17 00:00:00 2001 From: Bojan Date: Tue, 28 Apr 2026 14:29:16 +0200 Subject: [PATCH 20/21] agent-scope: zero-config onboarding for new clones Coworkers pulling this branch previously had to handcraft `.dkg/config.yaml` themselves and could hit Cursor MCP wiring issues if pnpm/tsx weren't on the spawn PATH. With this change, the onboarding flow becomes: pnpm install # postinstall auto-runs scripts/scope-setup.mjs pnpm build # builds packages/mcp-dkg/dist/index.js dkg start # in another terminal # open Cursor and chat normally What's new: - scripts/scope-setup.mjs auto-creates .dkg/config.yaml with team defaults (api=localhost:9200, tokenFile=~/.dkg/auth.token, contextGraph=dev-coordination) and a per-machine agent URI derived as `urn:dkg:agent:cursor-${user}-${hostname}`. If the daemon's up it also creates the dev-coordination paranet via the daemon API. Idempotent, silent on re-run, never fails postinstall when the daemon is down. - package.json exposes `pnpm scope:setup` (manual) and `postinstall` (auto with --auto flag) so the script runs on every fresh install without anyone having to remember it. - .cursor/mcp.json switched from `pnpm exec tsx ` to `node packages/mcp-dkg/dist/index.js`. Cursor spawns MCP servers in a non-interactive environment that often lacks NVM-managed pnpm/tsx on PATH, but `node` almost always resolves; this also cuts ~500ms off MCP startup. - agent-scope/README.md gains an "Onboarding (new clone)" section with the new flow plus a fallback recipe for the rare cases where the project-level Cursor MCP doesn't load. Made-with: Cursor --- .cursor/mcp.json | 6 +- agent-scope/README.md | 50 +++++++- package.json | 4 +- scripts/scope-setup.mjs | 255 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 309 insertions(+), 6 deletions(-) create mode 100755 scripts/scope-setup.mjs diff --git a/.cursor/mcp.json b/.cursor/mcp.json index d472b2353..27bd7b7bc 100644 --- a/.cursor/mcp.json +++ b/.cursor/mcp.json @@ -1,9 +1,9 @@ { - "_comment": "DKG MCP read tools. Invoked via pnpm exec so the workspace-local tsx binary (devDependency of the root package) runs the TypeScript source directly. This avoids the fresh-clone footgun where dist/ is gitignored and the auto-loaded MCP server would 404 before anyone ran `pnpm build`. Config (API, token, project) comes from .dkg/config.yaml in the workspace root. `cwd` is pinned to the workspace folder because Cursor spawns MCP servers from its own CWD, not the workspace — without this, pnpm resolves the wrong workspace and the upward .dkg/config.yaml lookup misses the token file.", + "_comment": "DKG MCP read tools. Spawned by Cursor on workspace open. We invoke the built `dist/index.js` via `node` (rather than `pnpm exec tsx` against the source) for two reasons: (1) Cursor spawns MCP servers in a non-interactive environment that often does NOT have NVM-managed `pnpm`/`tsx` on PATH, but `node` almost always resolves; (2) starting from compiled JS is ~500ms faster than tsx-on-the-fly. Coworkers run `pnpm install && pnpm build` once, after which this auto-wires. `cwd` is pinned to the workspace because Cursor spawns from its own install dir by default, and the MCP server walks upwards from cwd to find `.dkg/config.yaml`.", "mcpServers": { "dkg": { - "command": "pnpm", - "args": ["exec", "tsx", "packages/mcp-dkg/src/index.ts"], + "command": "node", + "args": ["packages/mcp-dkg/dist/index.js"], "cwd": "${workspaceFolder}" } } diff --git a/agent-scope/README.md b/agent-scope/README.md index 2b302474c..736666489 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -58,12 +58,58 @@ written without your OK. `AGENTS.md` / `GEMINI.md` on session start and are expected to follow the rules. Best-effort. -After you clone the repo, run this once to check your agent is wired up: +## Onboarding (new clone) + +The intended flow for a coworker who just cloned the repo: + +```bash +pnpm install # postinstall auto-runs scope-setup +pnpm build # builds packages/mcp-dkg/dist/index.js +dkg start # in another terminal, leave running +# open Cursor → chat normally +``` + +What the postinstall step does for you (`scripts/scope-setup.mjs`): + +1. **Writes `.dkg/config.yaml`** if it doesn't exist, with sensible + defaults (`http://localhost:9200`, `~/.dkg/auth.token`, + `contextGraph: dev-coordination`) and a per-machine agent URI + auto-derived as `urn:dkg:agent:cursor-${user}-${hostname}`. +2. **Creates the `dev-coordination` paranet** on your local DKG daemon + if it isn't there yet (skipped silently if the daemon isn't up — the + next manual `pnpm scope:setup` will pick it up). + +If postinstall hooks make you nervous you can run it explicitly any +time: ```bash -pnpm scope:check-agent +pnpm scope:setup # idempotent, safe to re-run +pnpm scope:check-agent # verify Cursor / Claude Code are wired ``` +### When the project-level MCP doesn't load in Cursor + +Cursor's project `.cursor/mcp.json` is committed and points at the +built `packages/mcp-dkg/dist/index.js`. If after `pnpm build` and a +Cursor restart you still don't see `dkg_*` tools in the chat, fall +back to adding it to your global `~/.cursor/mcp.json` with absolute +paths: + +```json +{ + "mcpServers": { + "dkg": { + "command": "node", + "args": ["/absolute/path/to/dkg/packages/mcp-dkg/dist/index.js"], + "cwd": "/absolute/path/to/dkg" + } + } +} +``` + +This is the safety net for Cursor environments where `node` isn't on +the spawn PATH. + ## Editing agent-scope itself The files that run the guard are permanently off-limits to the agent — diff --git a/package.json b/package.json index 895a60b8f..ba8be8ab3 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,9 @@ "test:e2e:ui": "pnpm --filter @origintrail-official/dkg-node-ui test:e2e", "test:all": "pnpm test && pnpm test:evm", "scope:test": "node --test agent-scope/lib/scope.test.mjs agent-scope/lib/dkg-source.test.mjs agent-scope/lib/shell-parse.test.mjs agent-scope/lib/denial.test.mjs agent-scope/lib/check-agent.test.mjs", - "scope:check-agent": "node agent-scope/lib/check-agent.mjs" + "scope:check-agent": "node agent-scope/lib/check-agent.mjs", + "scope:setup": "node scripts/scope-setup.mjs", + "postinstall": "node scripts/scope-setup.mjs --auto" }, "devDependencies": { "@types/node": "^22", diff --git a/scripts/scope-setup.mjs b/scripts/scope-setup.mjs new file mode 100755 index 000000000..b52529d12 --- /dev/null +++ b/scripts/scope-setup.mjs @@ -0,0 +1,255 @@ +#!/usr/bin/env node +/** + * scope-setup — bootstrap a coworker's local agent-scope wiring. + * + * What it does (in order, each step is independently skippable / idempotent): + * + * 1. Make sure `.dkg/config.yaml` exists. If not, write one with: + * - `node.api: http://localhost:9200` (the local DKG daemon) + * - `node.tokenFile: ~/.dkg/auth.token` (where `dkg start` puts it) + * - `contextGraph: dev-coordination` (team-wide paranet) + * - `agent.uri: urn:dkg:agent:cursor-${user}@${host}` (auto-derived per machine) + * The values come from constants below; the agent URI is derived from + * `process.env.USER` and `os.hostname()` so each coworker / machine has + * a distinct identity without anyone editing YAML by hand. + * + * 2. If the daemon is reachable on `node.api`, make sure the + * `dev-coordination` paranet exists. If it doesn't, create it via the + * daemon's `/api/context-graph/create` endpoint. Daemon down → skip + * with a one-line note (creation will happen next run / on demand). + * + * 3. Print a short summary (what was created / skipped / already-OK) and + * a one-line reminder if the user still needs to start the daemon or + * restart Cursor. + * + * This script is wired into `pnpm postinstall` and `pnpm scope:setup`. The + * postinstall path uses `--auto` which suppresses prompts and exits 0 even + * if the daemon is down — we never want `pnpm install` itself to fail on + * a coworker's machine just because their daemon isn't up yet. + * + * Anything that already exists is left strictly alone — re-running this is + * safe and produces no output beyond `[scope-setup] nothing to do`. + */ + +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const __filename = fileURLToPath(import.meta.url); +const REPO_ROOT = path.resolve(path.dirname(__filename), '..'); + +const CONTEXT_GRAPH_ID = 'dev-coordination'; +const CONTEXT_GRAPH_NAME = 'Dev Coordination'; +const CONTEXT_GRAPH_DESC = + 'Cross-agent dev task coordination — agent-scope reads `tasks:Task` records here to compute each agent\'s allowed write paths.'; +const DAEMON_API = 'http://localhost:9200'; +const TOKEN_FILE = '~/.dkg/auth.token'; + +// `--auto` = postinstall mode: never prompt, never fail-loud. Plain +// invocation = the user ran `pnpm scope:setup` themselves and probably +// wants to see what happened, so we print more. +const AUTO_MODE = process.argv.includes('--auto'); + +function log(msg) { + process.stdout.write(`[scope-setup] ${msg}\n`); +} + +function warn(msg) { + process.stderr.write(`[scope-setup] ${msg}\n`); +} + +/** + * Build a stable per-machine agent URI. Falls back gracefully when env + * is weird (CI, containers, etc) so we never produce a URI containing + * the literal string "undefined". + */ +function deriveAgentUri() { + const rawUser = process.env.USER || process.env.USERNAME || 'anon'; + const rawHost = os.hostname() || 'machine'; + const slug = (s) => + String(s) + .trim() + .toLowerCase() + .replace(/[^a-z0-9-]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 40) || 'x'; + return `urn:dkg:agent:cursor-${slug(rawUser)}-${slug(rawHost)}`; +} + +function ensureConfigFile() { + const dkgDir = path.join(REPO_ROOT, '.dkg'); + const configPath = path.join(dkgDir, 'config.yaml'); + + if (fs.existsSync(configPath)) { + return { created: false, configPath }; + } + + fs.mkdirSync(dkgDir, { recursive: true }); + + const agentUri = deriveAgentUri(); + const yaml = `# Auto-generated by scripts/scope-setup.mjs on first install. +# Safe to edit by hand — re-running scope-setup will leave it alone. +# This file is gitignored on purpose; it carries your personal agent URI. + +contextGraph: ${CONTEXT_GRAPH_ID} + +node: + api: ${DAEMON_API} + tokenFile: ${TOKEN_FILE} + +agent: + # Auto-derived from \`${process.env.USER || 'anon'}@${os.hostname()}\` so + # each coworker / machine has a distinct identity. Override this if you + # want a different URI (e.g. one per parallel Cursor chat). + uri: ${agentUri} + +capture: + subGraph: chat + assertion: chat-log + privacy: team +`; + + fs.writeFileSync(configPath, yaml, 'utf8'); + return { created: true, configPath, agentUri }; +} + +/** + * Read the daemon token from `~/.dkg/auth.token` (the canonical location + * where `dkg start` writes it). Returns null if the file isn't there yet + * — that just means the daemon hasn't been started on this machine, + * which is a soft failure for setup purposes. + */ +function readDaemonToken() { + const tokenPath = path.join(os.homedir(), '.dkg', 'auth.token'); + try { + const raw = fs.readFileSync(tokenPath, 'utf8'); + const line = raw.split('\n').find((l) => l.trim() && !l.startsWith('#')); + return line ? line.trim() : null; + } catch { + return null; + } +} + +/** + * Probe the daemon. Short timeout because this runs from postinstall and + * we don't want `pnpm install` to hang for 30 seconds on machines where + * the daemon isn't running. + */ +async function probeDaemon() { + try { + const res = await fetch(`${DAEMON_API}/api/agent/identity`, { + signal: AbortSignal.timeout(1500), + }); + return res.ok || res.status === 401; + } catch { + return false; + } +} + +async function ensureContextGraph(token) { + const headers = { + 'content-type': 'application/json', + ...(token ? { authorization: `Bearer ${token}` } : {}), + }; + + let listRes; + try { + listRes = await fetch(`${DAEMON_API}/api/context-graph/list`, { + headers, + signal: AbortSignal.timeout(2000), + }); + } catch (err) { + return { ok: false, reason: `list-failed: ${err.message}` }; + } + + if (listRes.status === 401) { + return { ok: false, reason: 'unauthorized (no token at ~/.dkg/auth.token)' }; + } + if (!listRes.ok) { + return { ok: false, reason: `list http ${listRes.status}` }; + } + + const listJson = await listRes.json().catch(() => ({})); + const exists = (listJson.contextGraphs || []).some( + (g) => g.id === CONTEXT_GRAPH_ID, + ); + if (exists) return { ok: true, alreadyExisted: true }; + + let createRes; + try { + createRes = await fetch(`${DAEMON_API}/api/context-graph/create`, { + method: 'POST', + headers, + body: JSON.stringify({ + id: CONTEXT_GRAPH_ID, + name: CONTEXT_GRAPH_NAME, + description: CONTEXT_GRAPH_DESC, + }), + signal: AbortSignal.timeout(5000), + }); + } catch (err) { + return { ok: false, reason: `create-failed: ${err.message}` }; + } + + if (!createRes.ok) { + const body = await createRes.text().catch(() => ''); + return { + ok: false, + reason: `create http ${createRes.status}: ${body.slice(0, 200)}`, + }; + } + return { ok: true, alreadyExisted: false }; +} + +async function main() { + const cfgResult = ensureConfigFile(); + if (cfgResult.created) { + log(`wrote ${path.relative(REPO_ROOT, cfgResult.configPath)} (agent=${cfgResult.agentUri})`); + } else if (!AUTO_MODE) { + log(`config already exists at ${path.relative(REPO_ROOT, cfgResult.configPath)} — leaving it alone`); + } + + const daemonUp = await probeDaemon(); + if (!daemonUp) { + if (AUTO_MODE) { + // Stay quiet on postinstall — this is the common case and not an error + if (cfgResult.created) { + log('daemon not reachable; start it with `dkg start`, then re-run `pnpm scope:setup` to create the dev-coordination paranet.'); + } + } else { + warn(`daemon not reachable at ${DAEMON_API} — start it with \`dkg start\` and re-run.`); + } + return 0; + } + + const token = readDaemonToken(); + const cgResult = await ensureContextGraph(token); + if (cgResult.ok) { + if (cgResult.alreadyExisted) { + if (!AUTO_MODE) log(`context graph "${CONTEXT_GRAPH_ID}" already exists — nothing to do.`); + } else { + log(`created context graph "${CONTEXT_GRAPH_ID}" on the local daemon.`); + } + } else { + warn(`could not ensure context graph "${CONTEXT_GRAPH_ID}": ${cgResult.reason}`); + warn('you can create it later with: dkg paranet create dev-coordination -n "Dev Coordination"'); + } + + if (cfgResult.created && !AUTO_MODE) { + log('done. Restart Cursor (or any open IDE chats) so the new MCP config picks up.'); + } + return 0; +} + +main() + .then((code) => process.exit(code)) + .catch((err) => { + if (AUTO_MODE) { + // Don't break `pnpm install` if something goes sideways + warn(`postinstall scope-setup failed (non-fatal): ${err?.message || err}`); + process.exit(0); + } + warn(`fatal: ${err?.stack || err}`); + process.exit(1); + }); From b6af2bb14b2ac46b16dfc30d27236ca61cb983af Mon Sep 17 00:00:00 2001 From: Bojan Date: Tue, 28 Apr 2026 15:23:02 +0200 Subject: [PATCH 21/21] mcp-dkg: auto-create the configured paranet on first connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit got coworkers most of the way to "pull, install, build, start node, chat" — but left one rough edge: the dev-coordination paranet was only created if the daemon happened to be up at install time, which it usually isn't. A coworker would chat normally, the agent would try `dkg_add_task`, the call would 404, and they'd have to remember to run `pnpm scope:setup` manually after `dkg start`. This commit closes that gap by having the MCP server itself ensure the paranet exists on startup, before serving any tools. Specifically: - `DkgClient.createContextGraph` is added, mirroring the v10 `/api/context-graph/create` endpoint with a legacy `/api/paranet/create` fallback so older daemons still work. - `ensureContextGraph()` in the MCP entrypoint runs once per session: list projects, check if the configured `contextGraph` is present, create it if not. Best-effort — daemon down or auth errors are logged and the server continues serving (read tools still work against existing graphs, and the next session will retry). So now every Cursor / Claude Code session naturally "heals" the paranet on first connect. The setup script's "daemon was down at install time" warning is softened accordingly: there's nothing to re-run, the MCP will handle it. README onboarding section trimmed to match. Made-with: Cursor --- agent-scope/README.md | 35 +++++++++++++++++---------- packages/mcp-dkg/src/client.ts | 32 +++++++++++++++++++++++++ packages/mcp-dkg/src/index.ts | 44 +++++++++++++++++++++++++++++++++- scripts/scope-setup.mjs | 10 +++++--- 4 files changed, 104 insertions(+), 17 deletions(-) diff --git a/agent-scope/README.md b/agent-scope/README.md index 736666489..560c67396 100644 --- a/agent-scope/README.md +++ b/agent-scope/README.md @@ -60,30 +60,39 @@ written without your OK. ## Onboarding (new clone) -The intended flow for a coworker who just cloned the repo: +The intended flow for a coworker who just cloned the repo — that's it, +no extra commands: ```bash -pnpm install # postinstall auto-runs scope-setup +pnpm install # postinstall writes .dkg/config.yaml pnpm build # builds packages/mcp-dkg/dist/index.js dkg start # in another terminal, leave running # open Cursor → chat normally ``` -What the postinstall step does for you (`scripts/scope-setup.mjs`): +Two pieces of automation make that work: -1. **Writes `.dkg/config.yaml`** if it doesn't exist, with sensible - defaults (`http://localhost:9200`, `~/.dkg/auth.token`, +1. **`scripts/scope-setup.mjs`** runs from the root `postinstall` and + writes `.dkg/config.yaml` with sensible defaults + (`http://localhost:9200`, `~/.dkg/auth.token`, `contextGraph: dev-coordination`) and a per-machine agent URI - auto-derived as `urn:dkg:agent:cursor-${user}-${hostname}`. -2. **Creates the `dev-coordination` paranet** on your local DKG daemon - if it isn't there yet (skipped silently if the daemon isn't up — the - next manual `pnpm scope:setup` will pick it up). - -If postinstall hooks make you nervous you can run it explicitly any -time: + auto-derived as `urn:dkg:agent:cursor-${user}-${hostname}`. It also + tries to create the `dev-coordination` paranet on the daemon — but + the daemon is usually still down at install time, so this part is + best-effort. +2. **The MCP server itself auto-provisions the paranet on first + connect.** When Cursor/Claude Code spawns + `packages/mcp-dkg/dist/index.js` and your daemon is up, the server + checks whether the configured `contextGraph` exists and, if not, + creates it before serving any tools. So the very first `dkg_*` + tool call from the agent always lands in a live graph — coworkers + never have to run `pnpm scope:setup` manually after starting the + daemon. + +If you want to peek at it manually: ```bash -pnpm scope:setup # idempotent, safe to re-run +pnpm scope:setup # rerun the postinstall step pnpm scope:check-agent # verify Cursor / Claude Code are wired ``` diff --git a/packages/mcp-dkg/src/client.ts b/packages/mcp-dkg/src/client.ts index bdc052d70..1a936fd14 100644 --- a/packages/mcp-dkg/src/client.ts +++ b/packages/mcp-dkg/src/client.ts @@ -130,6 +130,38 @@ export class DkgClient { return r.subGraphs ?? []; } + /** + * Create a context graph (a.k.a. paranet) on the daemon. The MCP server + * uses this at startup to auto-provision the workspace's configured + * `contextGraph` if it isn't there yet — see `ensureContextGraph` in + * `index.ts`. Mirrors the v10 `/api/context-graph/create` endpoint; + * legacy `/api/paranet/create` is tried as a fallback for daemons that + * still ship the older route. + */ + async createContextGraph( + id: string, + name: string, + description?: string, + ): Promise<{ created?: string; uri?: string }> { + const body = { id, name, description }; + try { + return await this.request<{ created?: string; uri?: string }>( + 'POST', + '/api/context-graph/create', + body, + ); + } catch (err) { + if (err instanceof DkgHttpError && err.status === 404) { + return await this.request<{ created?: string; uri?: string }>( + 'POST', + '/api/paranet/create', + body, + ); + } + throw err; + } + } + // ── Query ────────────────────────────────────────────────────── /** * Memory-layer routing is controlled by `view` + `graphSuffix`: diff --git a/packages/mcp-dkg/src/index.ts b/packages/mcp-dkg/src/index.ts index e3fa1d319..533949078 100644 --- a/packages/mcp-dkg/src/index.ts +++ b/packages/mcp-dkg/src/index.ts @@ -9,7 +9,7 @@ */ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; -import { loadConfig, describeConfig } from './config.js'; +import { loadConfig, describeConfig, type DkgConfig } from './config.js'; import { DkgClient } from './client.js'; import { registerReadTools } from './tools.js'; import { registerWriteTools } from './tools/writes.js'; @@ -19,6 +19,46 @@ import { runCli, isKnownCliSubcommand } from './cli/index.js'; const VERSION = '0.1.0'; +/** + * Make sure the configured `contextGraph` exists on the daemon. This runs + * once per MCP session (typically: every Cursor / Claude Code startup). + * + * Why this lives here: a coworker's first install almost always happens + * with the daemon down — the order is `pnpm install` → `pnpm build` → + * `dkg start` → open Cursor. The postinstall `scripts/scope-setup.mjs` + * writes `.dkg/config.yaml` but skips paranet creation (daemon was down). + * Without this, the agent's first `dkg_add_task` call would 404 and + * we'd be back to "user has to run an extra command before the system + * works". Auto-creating here means by the time the first tool call + * lands, the graph is live. + * + * Best-effort. If the daemon is down, unauthorized, or the create fails + * for any reason, we log to stderr and continue serving — read tools + * still work against existing graphs, and the next session will retry. + * Never throws. + */ +async function ensureContextGraph(client: DkgClient, config: DkgConfig): Promise { + if (!config.defaultProject) return; + try { + const projects = await client.listProjects(); + const exists = projects.some((p) => p?.id === config.defaultProject); + if (exists) return; + await client.createContextGraph( + config.defaultProject, + config.defaultProject, + `Auto-created by dkg-mcp on first connect — agent-scope coordination graph.`, + ); + process.stderr.write( + `[dkg-mcp] auto-created context graph "${config.defaultProject}" (first run on this daemon)\n`, + ); + } catch (err) { + const msg = err instanceof Error ? err.message : String(err); + process.stderr.write( + `[dkg-mcp] note: could not ensure context graph "${config.defaultProject}": ${msg}\n`, + ); + } +} + /** * Dual-mode entrypoint. With no args (the way Cursor / Claude Code * spawn an MCP server), boot the stdio MCP server. With a known @@ -37,6 +77,8 @@ async function main(): Promise { process.stderr.write(`[dkg-mcp ${VERSION}] ${describeConfig(config)}\n`); const client = new DkgClient({ config }); + await ensureContextGraph(client, config); + const server = new McpServer({ name: 'dkg', version: VERSION }); registerReadTools(server, client, config); diff --git a/scripts/scope-setup.mjs b/scripts/scope-setup.mjs index b52529d12..9b40926c6 100755 --- a/scripts/scope-setup.mjs +++ b/scripts/scope-setup.mjs @@ -213,12 +213,16 @@ async function main() { const daemonUp = await probeDaemon(); if (!daemonUp) { if (AUTO_MODE) { - // Stay quiet on postinstall — this is the common case and not an error + // Stay quiet on postinstall — this is the common path. The MCP + // server will auto-create the paranet on first connect (see + // ensureContextGraph in packages/mcp-dkg/src/index.ts), so the + // user genuinely doesn't need to re-run anything after starting + // the daemon — they can just open Cursor and chat. if (cfgResult.created) { - log('daemon not reachable; start it with `dkg start`, then re-run `pnpm scope:setup` to create the dev-coordination paranet.'); + log(`daemon not reachable yet — fine, the MCP server will create "${CONTEXT_GRAPH_ID}" on its first connect after \`dkg start\`.`); } } else { - warn(`daemon not reachable at ${DAEMON_API} — start it with \`dkg start\` and re-run.`); + warn(`daemon not reachable at ${DAEMON_API} — start it with \`dkg start\` and re-run, OR just chat in Cursor (the MCP auto-creates "${CONTEXT_GRAPH_ID}" on first connect).`); } return 0; }