diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index e69526d5..2ed95ac4 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -6,13 +6,13 @@ }, "metadata": { "description": "Plan-first workflows for Claude Code and Factory Droid. Two plugins: flow-next (recommended, zero-dep, Ralph autonomous mode) and flow (Beads integration).", - "version": "0.38.3" + "version": "0.39.0" }, "plugins": [ { "name": "flow-next", "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode (multi-model review gates). Worker subagent per task for context isolation. Includes 21 subagents, 13 commands, 18 skills.", - "version": "0.38.3", + "version": "0.39.0", "author": { "name": "Gordon Mickel", "email": "gordon@mickel.tech", diff --git a/.flow/epics/fn-38-project-glossary-decision-records-and.json b/.flow/epics/fn-38-project-glossary-decision-records-and.json new file mode 100644 index 00000000..a421f6b1 --- /dev/null +++ b/.flow/epics/fn-38-project-glossary-decision-records-and.json @@ -0,0 +1,13 @@ +{ + "branch_name": "fn-38-project-glossary-decision-records-and", + "created_at": "2026-04-30T07:30:25.356337Z", + "depends_on_epics": [], + "id": "fn-38-project-glossary-decision-records-and", + "next_task": 1, + "plan_review_status": "unknown", + "plan_reviewed_at": null, + "spec_path": ".flow/specs/fn-38-project-glossary-decision-records-and.md", + "status": "open", + "title": "Project glossary, decision records, and doc-aware interview", + "updated_at": "2026-04-30T07:57:35.789561Z" +} diff --git a/.flow/memory/README.md b/.flow/memory/README.md new file mode 100644 index 00000000..ceb97c99 --- /dev/null +++ b/.flow/memory/README.md @@ -0,0 +1,3 @@ +# .flow/memory/ + +Categorized project memory. See flow-next docs for schema. diff --git a/.flow/memory/bug/build-errors/.gitkeep b/.flow/memory/bug/build-errors/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/data/.gitkeep b/.flow/memory/bug/data/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/integration/.gitkeep b/.flow/memory/bug/integration/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/performance/.gitkeep b/.flow/memory/bug/performance/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/runtime-errors/.gitkeep b/.flow/memory/bug/runtime-errors/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/security/.gitkeep b/.flow/memory/bug/security/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/test-failures/.gitkeep b/.flow/memory/bug/test-failures/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/bug/ui/.gitkeep b/.flow/memory/bug/ui/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/knowledge/architecture-patterns/.gitkeep b/.flow/memory/knowledge/architecture-patterns/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/knowledge/best-practices/.gitkeep b/.flow/memory/knowledge/best-practices/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/knowledge/conventions/.gitkeep b/.flow/memory/knowledge/conventions/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/knowledge/tooling-decisions/.gitkeep b/.flow/memory/knowledge/tooling-decisions/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/knowledge/workflow/.gitkeep b/.flow/memory/knowledge/workflow/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/.flow/memory/knowledge/workflow/audit-sync-codexsh-during-planning-for-2026-04-30.md b/.flow/memory/knowledge/workflow/audit-sync-codexsh-during-planning-for-2026-04-30.md new file mode 100644 index 00000000..c46d7849 --- /dev/null +++ b/.flow/memory/knowledge/workflow/audit-sync-codexsh-during-planning-for-2026-04-30.md @@ -0,0 +1,53 @@ +--- +title: Audit sync-codex.sh during planning for Codex mirror impact +date: "2026-04-30" +track: knowledge +category: workflow +module: planning +tags: [sync-codex, codex, planning, mirror, validation, subagents, tool-rewrites, openai-yaml] +applies_when: Audit sync-codex.sh during planning for Codex mirror impact +--- + +When planning a flow-next epic that touches skills, agents, slash commands, tool references, or skill prose, audit `scripts/sync-codex.sh` BEFORE writing tasks. The Codex mirror at `plugins/flow-next/codex/` is a derived artifact and the sync script is the single choke point — a missing sync rule silently degrades Codex parity (a real failure mode in 0.34.0-0.36.0, fixed in 0.37.1). + +## What to verify during planning + +**New user-facing skill** (any new `/flow-next:` slash command): +- Add the skill name to the `REQUIRED_OPENAI_YAML_SKILLS` array in `scripts/sync-codex.sh` (~line 537). Validation hard-fails if missing. +- Add a `generate_openai_yaml` call (~lines 514-531). Pick the right brand color section: workflow blue `#3B82F6`, review red `#EF4444`, utility amber `#F59E0B`. +- Provide a default prompt only when it materially helps the user (capture/work/plan/prospect have one; interview/audit/setup don't). + +**New tool reference** (a Claude-native tool we haven't used canonically before): +- Add a rewrite rule in the tool-name transformation block at lines 360-491 (currently handles `AskUserQuestion → request_user_input`). New tools need new sed transforms. +- Document the canonical → mirror mapping in CLAUDE.md "Cross-platform patterns" so future skill authors don't reinvent. + +**New `.md` agent in `plugins/flow-next/agents/`**: +- Verify the `.md → .toml` conversion logic picks it up (sync-codex.sh walks the agents directory). +- After running sync, confirm the corresponding `.toml` file appears in `plugins/flow-next/codex/agents/`. + +**New prose rule** (e.g. "no jargon X must appear in user-facing files"): +- Validation lives in **two places**, not one — mirror the existing `AskUserQuestion` / `ToolSearch` split: + - **Canonical scan**: `ci_test.sh` greps `plugins/flow-next/skills + agents + commands + scripts/flowctl.py` + - **Mirror scan**: `scripts/sync-codex.sh` validation block at lines 760-770 greps `plugins/flow-next/codex/skills/` and `codex/agents/` +- Mirror scan stays with the sync script because the mirror is its responsibility. + +**After any prose change in skills/agents**: +- Re-run `./scripts/sync-codex.sh`. Verify zero validation errors before committing. +- The script regenerates the entire `codex/` tree; commit it alongside the canonical change. + +## Why this matters + +Skipping this audit during planning produces silent Codex degradation that doesn't surface for releases. Concrete past failures: +- 0.34.0-0.37.0: 4 user-facing skills (resolve-pr, prospect, audit, memory-migrate) shipped without `openai.yaml` UI metadata because nobody added the `generate_openai_yaml` call. Fixed in 0.37.1. +- Same era: skills shipped with inline cross-platform tables (`AskUserQuestion / request_user_input / ask_user`) polluting agent context because the sync rewrite responsibility wasn't centralized in the script. Also fixed in 0.37.1. + +## Pattern this applies to + +Any flow-next planning step that produces tasks touching: +- `plugins/flow-next/skills/**/*.md` +- `plugins/flow-next/agents/**/*.md` +- `plugins/flow-next/commands/**/*.md` +- `plugins/flow-next/scripts/flowctl.py` (when help text or argparse prose changes) +- `scripts/sync-codex.sh` itself (extending validation, rewrites, generation) + +If the planned change touches any of these, the planning step MUST include a sync-codex.sh audit task or fold the audit into an existing task's acceptance. diff --git a/.flow/specs/fn-38-project-glossary-decision-records-and.md b/.flow/specs/fn-38-project-glossary-decision-records-and.md new file mode 100644 index 00000000..aa61bfa2 --- /dev/null +++ b/.flow/specs/fn-38-project-glossary-decision-records-and.md @@ -0,0 +1,276 @@ +## Overview + +Three new artifacts (`GLOSSARY.md` at repo root, `knowledge/decisions/` memory category, doc-aware mode in `/flow-next:interview`) plus extensions to `docs-gap-scout`, `/flow-next:audit`, `/flow-next:sync`, and a terminology guard. Foundational schema + flowctl plumbing land first; interview integration depends on both; downstream extensions (scout / audit / sync) consume the new artifacts; docs and the R17 guard close the epic. + +## Conversation Evidence + +> user (turn 1, part 1): "Do you think this would be a good optional extension of our interview skill? Maybe with a flag or an option. Interview, you know, just interview, and then the user can either just write with docs or minus minus docs." +> user (turn 1, part 2): "We'd have to make it fit flow next. So none of his terminology improve on the skills, and then potentially our auditor skills or drift skills and doc scouts should take that into account if it exists as potential paths." +> user (turn 1, part 3): "We will not be mentioning the inspiration for this." +> user (turn 3): "we always need to follow the core tenet of 'the repo is state' and 'the repo is memory' -- the flow next memory is different" +> user (turn 4): "also tell me which DDD stuff you are removing, i think this is his ubiquitous language stuff" +> user (turn 6): "not sure about dropping the subdirectory GLOSSARY.md files, discuss" +> user (turn 7): "use /flow-next:flow-next:capture to capture this" + +## Goal & Context + + + +Projects accumulate vocabulary that isn't standard CS jargon — terms with project-specific canonical meaning, mode names whose word is overloaded, role names that look ordinary but carry precise semantics. Without a canonical reference, agents reading the codebase have to infer meaning from usage, and the same word ends up meaning different things in different turns of the same conversation. + +Separately, projects accumulate **decisions** — choices that are hard to reverse, surprising-without-context, and the result of real trade-offs. flow-next memory currently captures backward-looking learnings (`bug/*`, `knowledge/*`) but has no shape for forward-looking decisions with their rejected alternatives. Architecture-patterns is the closest neighbor but tracks "what we observed", not "what we picked and why". + +This epic introduces a project glossary at the repo root and a decision-records memory category, then layers four behaviors on top of the existing interview skill that activate when either artifact is present. The glossary lives at the repo root because project knowledge belongs to the project — every agent that walks `CLAUDE.md` / `AGENTS.md` should also read it. Decisions live under `.flow/memory/knowledge/decisions/` because they have a flow-next-shaped lifecycle (status, supersession, audit). + +## Architecture & Data Models + + + +Three artifacts: + +1. **`GLOSSARY.md` at repo root** — plain markdown, structured per-term sections. Term name (heading), one-line definition, optional `_Avoid_` aliases, optional relationships block. Subdirectory `GLOSSARY.md` files are supported via nearest-ancestor resolution: when working inside a subdirectory, the nearest-ancestor `GLOSSARY.md` applies; root applies otherwise. No meta-file equivalent — the filesystem already encodes structure. Format precedent: H2-per-term with paragraph definition (GitBook + glossarify-md convention). + +2. **`knowledge/decisions/` memory category** — extends the categorized memory schema with one new knowledge category. Reuses the existing frontmatter (`title`, `date`, `track`, `category`, `module`, `tags`), plus optional decision-specific fields (`decision_status`: proposed | accepted | superseded; `superseded_by`; `alternatives_considered`). Body is a 1-3 sentence floor; `Considered Options` and `Consequences` sections are optional. Indexed by `memory-scout`, walked by `/flow-next:audit`, listed by `flowctl memory list/search/read`. + +3. **Doc-aware mode in `/flow-next:interview`** — autodetects when `GLOSSARY.md` exists at the repo root OR `.flow/memory/knowledge/decisions/` has any entry. Off when neither is present. Forced on by `--docs` (creates `GLOSSARY.md` lazily on first resolved term); forced off by `--no-docs`. + +When doc-aware mode is on, four behaviors layer onto the existing interview workflow: + +- (a) **Phase-zero glossary scan** — read the nearest-ancestor `GLOSSARY.md`, find any term in the user's request that has a defined canonical entry; if the user's wording conflicts with the canonical term, surface the conflict as the first interview question. +- (b) **Fuzzy-term sharpening** — when the user uses overloaded language across the conversation, propose a canonical term, ask which is meant, append the resolved term to `GLOSSARY.md` via `flowctl glossary add`. +- (c) **Code/spec contradiction surfacing** — when grep reveals the code disagrees with a user assertion, surface the contradiction as a question rather than a silent codebase-resolved entry. +- (d) **Inline writes** — glossary append on every term resolution; decision entry written via `flowctl memory add --track knowledge --category decisions ...` only when the three-criteria gate passes. Decision writes follow the capture/audit pattern: agent shows draft via `AskUserQuestion` before writing. + +Throttle for (a): only flag glossary conflicts when the term is load-bearing for the current spec. A casual passing mention of a defined word does not trigger; specifying behavior that depends on a precise meaning does. + +## API Contracts + + + +flowctl glossary subcommands (atomic writes; no judgment): + +- `flowctl glossary add --definition "..." [--avoid "a,b,c"] [--relates-to "..."]` — append or update a term entry. Writes to nearest-ancestor `GLOSSARY.md`; creates the file at repo root if no ancestor exists. Multi-line definitions accepted via `--definition-file -` (read from stdin) or `--definition-file `. +- `flowctl glossary list [--json]` — emit all defined terms (term + definition + avoid aliases). When multiple `GLOSSARY.md` files exist, group by file. +- `flowctl glossary read ` — print the entry for a term. Resolution starts from cwd and walks ancestors. +- `flowctl glossary remove ` — delete the entry from the file that defines it. + +Decision entries reuse existing memory commands (`flowctl memory add --track knowledge --category decisions ...`); no new subcommands required for decision records. + +## Edge Cases & Constraints + + + +- **Term divergence across subdirectories** — a sub-glossary may legitimately define a term differently from root (e.g., admin role concept vs consumer role concept). Audit may surface as a "potential conflict" but does not forbid divergence. [inferred] +- **No DDD jargon** — skill prose, file format, tooling output, and user-facing documentation must not use the phrases "ubiquitous language", "bounded context", "domain expert", "aggregate root", or equivalent DDD terminology. [user] +- **Three-criteria decision gate** — before writing a decision entry, verify hard-to-reverse + surprising-without-context + result-of-real-trade-off. If any of the three fails, skip the decision write. The gate is enforced in interview skill prose, not in the schema (schema is permissive). [paraphrase] +- **Interview drag throttle** — glossary-conflict question on every defined word would exhaust users. Conflict surfaces only when the term is load-bearing for the current spec. [paraphrase] +- **No meta-file for subdirectory glossaries** — the filesystem already encodes subproject structure. Nearest-ancestor resolution is sufficient. [user] +- **Adoption gradient** — projects without `GLOSSARY.md` and without decision entries see no behavior change in interview. Autodetect keeps the cost-of-presence at zero for projects that don't need it. [paraphrase] +- **Survives flow-next uninstall** — `GLOSSARY.md` at repo root is project state, not flow-next bookkeeping. Removing `.flow/` deletes decisions but leaves the glossary intact. [paraphrase] +- **Nearest-ancestor walk is bounded** — walk stops at git repo root (`get_repo_root()`), filesystem boundary (`st_dev` change), or a 32-level defensive cap. Symlinks are not followed by walking logic; `pathlib.Path.parent` traversal does not recurse into them. [inferred] +- **Write-target follows read-target** — `flowctl glossary add` writes to whichever `GLOSSARY.md` the lookup would resolve. To force creation of a new subdirectory glossary, drop an empty `GLOSSARY.md` in the target subdir first, then run `add` from inside that subtree. [inferred] +- **Multi-line definitions** — `--definition` is single-line (shell quoting); multi-line definitions use `--definition-file -` (stdin) or `--definition-file `. Behavior (b) inline writes use the stdin variant. [inferred] +- **Decision write confirmation** — decision entries follow the capture/audit pattern: agent shows the proposed entry via `AskUserQuestion` before writing. User can approve, edit, or skip. [inferred] + +## Acceptance Criteria + +- **R1:** `GLOSSARY.md` lives at the repo root; subdirectory `GLOSSARY.md` files are supported. Neither is placed under `.flow/`. [user] +- **R2:** A `decisions` knowledge category exists; new decision entries land at `.flow/memory/knowledge/decisions/-.md` with the same frontmatter shape as other categorized entries. [paraphrase] +- **R3:** flowctl resolves glossary lookups via nearest-ancestor walk from the working directory. When run inside a subdirectory with its own `GLOSSARY.md`, that file applies; root applies otherwise. [user] +- **R4:** No meta-file (e.g. `GLOSSARY-MAP.md` or equivalent) is introduced for multi-glossary repos. Subdirectory glossaries are discovered via filesystem walk. [user] +- **R5:** `/flow-next:interview` autodetects doc-aware mode when `GLOSSARY.md` exists at the repo root OR `.flow/memory/knowledge/decisions/` has at least one entry. When neither is present, interview operates as today. [paraphrase] +- **R6:** `/flow-next:interview --docs` forces doc-aware mode on (lazily creating `GLOSSARY.md` at repo root on first term resolution); `--no-docs` forces it off. [paraphrase] +- **R7:** In doc-aware mode, when a user's request contains a term that conflicts with the nearest-ancestor glossary's canonical definition AND the term is load-bearing for the current spec, the interview surfaces the conflict as a question. [paraphrase] +- **R8:** In doc-aware mode, when fuzzy-term sharpening resolves an overloaded term, the resolution is written to the nearest-ancestor `GLOSSARY.md` via `flowctl glossary add` before the next question. [paraphrase] +- **R9:** In doc-aware mode, when grep reveals code-versus-assertion contradiction, the contradiction is surfaced as a question (not silently resolved). [paraphrase] +- **R10:** In doc-aware mode, decision entries are written only when all three gate criteria hold: hard-to-reverse, surprising-without-context, result-of-real-trade-off. Writes follow the capture/audit pattern (agent shows draft via `AskUserQuestion` before writing). [paraphrase] +- **R11:** `docs-gap-scout` extends its scan to include `GLOSSARY.md` files (root + subdirectories) and `.flow/memory/knowledge/decisions/` entries. When a planned change touches a defined term or invalidates a decision constraint, the scout flags those targets. [user] +- **R12:** `/flow-next:audit` walks glossary terms (greps code for term + `_Avoid_` aliases; marks stale on absence; surfaces alias-creep) and decision entries (verifies the constraint still holds; prompts for supersession on conflict). [user] +- **R13:** `/flow-next:sync` (plan-sync) detects glossary-term renames and implicit decision overrides during drift detection, and updates downstream specs accordingly. [user] +- **R14:** flowctl ships glossary subcommands (`add`, `list`, `read`, `remove`) with atomic writes (write-then-rename) and schema validation. Multi-line input accepted via `--definition-file -` / `--definition-file `. [paraphrase] +- **R15:** `GLOSSARY.md` is human-readable markdown with structured per-term sections (H2 heading per term, paragraph definition, optional `_Avoid_` line, optional relationships block); not YAML or any other format requiring tooling to parse. [paraphrase] +- **R16:** Decision entry bodies use a 1-3 sentence floor; `Considered Options` and `Consequences` sections are optional and only included when they add genuine value. [paraphrase] +- **R17:** No DDD terminology ("ubiquitous language", "bounded context", "domain expert", "aggregate root", or equivalent) appears in skill prose, file format documentation, flowctl help text, or user-facing output. An automated grep test enforces this in CI. [user] +- **R18:** Removing `.flow/` (e.g. via `rm -rf .flow/` or flow-next uninstall) deletes decision entries under `.flow/memory/knowledge/decisions/` but leaves `GLOSSARY.md` files (root + subdirectories) intact. The glossary is project state, not flow-next bookkeeping. [user] + +## Boundaries + + + +Out of scope: + +- A glossary-relationships meta-file. [user] +- DDD-style bounded-context modeling discipline forced on every project. [user] +- Multi-context relationship modeling (cross-context shared types). [inferred] +- Migration tooling for projects that already maintain a different glossary format. [inferred] +- A `--commit` flag on the glossary or decision-write subcommands; user owns staging. [inferred] +- Auto-generated glossary-from-code (term extraction via static analysis); glossary entries are user-curated. [inferred] +- Decision-supersession workflow (offering to supersede on `audit` is a follow-up, not part of this epic). [inferred] +- Subdir-scoped force flag (e.g. `--scope here`); to create a new subdirectory glossary, drop an empty `GLOSSARY.md` first. [inferred] +- Slug normalization for term names with special characters (`/`, `#`, spaces); v1 accepts plain ASCII heading text. [inferred] +- Validation that `superseded_by` points to an existing decision id; dangling refs surface during `/flow-next:audit`. [inferred] +- Concurrent-write coordination across parallel `flowctl glossary add` calls; atomic-write protects single calls but read-modify-write races may lose updates (Ralph + manual editing simultaneously). Document the limitation; defer locking. [inferred] +- File-watching / cache invalidation for long-lived agent loops; v1 re-reads on every command. [inferred] + +## Decision Context + + + +**Why root placement for the glossary, not under `.flow/`?** Project knowledge belongs to the project. Every agent that reads `CLAUDE.md` / `AGENTS.md` should also read `GLOSSARY.md`. Hiding it under `.flow/` makes it invisible to non-flow-next tooling. Root placement also survives flow-next uninstall. (User explicitly invoked the "repo is state" tenet.) + +**Why fold decisions into memory instead of a separate `.flow/decisions/` directory?** Decisions have a lifecycle (proposed → accepted → superseded). They get audited, searched, and surfaced by `memory-scout`. The categorized memory schema already captures every field a decision needs. A separate top-level concept would duplicate audit + search infrastructure for no gain. The user noted "the flow next memory is different" — different from the project glossary, but still the right home for decision records because decisions are flow-next-shaped state. + +**Why autodetect instead of a config flag?** Most projects don't need glossaries. Forcing config gymnastics on everyone is annoying. Autodetect keeps the surface invisible until the project commits to the concept (by writing the first term). + +**Why nearest-ancestor resolution for subdirectory glossaries?** Monorepos, plugin architectures, and library + example app repos legitimately have subprojects with distinct vocabularies. Forcing a single root glossary fights the filesystem signal. The resolution rule is simple enough to explain in one sentence and doesn't require a meta-file. Algorithm precedent: tsconfig.json (first-match-wins), bounded at git repo root (gitignore convention). + +**Why the three-criteria gate for decisions?** Hard-to-reverse + surprising-without-context + real-trade-off kills the bulk of would-be decision entries. Most "decisions" are easy to reverse, obvious in context, or had no real alternative. Without the gate, the decisions store fills with cruft within a quarter. + +**Why no DDD terminology?** flow-next is pragmatic and used across many project shapes (CLI tools, libraries, dev tools, plugins). DDD vocabulary excludes a large fraction of users and adds modeling discipline most projects don't need. The same artifact (a project glossary) can live without the DDD framing. + +**Closed-epic foundations** (no flowctl dep edges added; closed deps are no-ops, but listed for traceability): + +- **fn-30** (memory schema upgrade) — fn-38's `knowledge/decisions/` category extends the categorized YAML frontmatter schema; new optional fields (`decision_status`, `superseded_by`, `alternatives_considered`) layer onto existing frontmatter shape. +- **fn-34** (`/flow-next:audit` agent-native) — R12 extends the audit walk with glossary terms + decision entries; per-entry judge phase carries over directly. +- **fn-36** (interview grill-me enhancements) — R5–R10 layer onto fn-36's lead-with-recommendation, codebase-before-asking, and depth-cap-4 patterns. +- **fn-15-96t** (plan-sync agent) — R13 extends drift detection with two new signal types (glossary renames, decision overrides). + +## Approach + + + +**Sequencing.** Foundational tier ships first (T1 schema, T2 plumbing — parallel-startable). Interview integration (T3) and downstream extensions (T4-T6) consume both. Quality + docs close (T7-T8). Critical path: T1 + T2 → T3 → T7 + T8. + +**Reuse points** (verified in repo-scout): + +- `MEMORY_CATEGORIES` constants block at `flowctl.py:3659-3676` — extension point for `decisions` category +- `MEMORY_REQUIRED_FIELDS` / `MEMORY_OPTIONAL_FIELDS` / `MEMORY_KNOWLEDGE_FIELDS` at `flowctl.py:3679-3698` — extension point for decision-specific fields +- `MEMORY_FIELD_ORDER` at `flowctl.py:3722-3741` — deterministic write order; new fields need explicit slots +- `atomic_write()` at `flowctl.py:798` — used directly for `GLOSSARY.md` writes (whole-file replace) +- `validate_memory_frontmatter()` at `flowctl.py:4571-4655` and `validate_prospect_frontmatter()` at `flowctl.py:4150-4182` — templates for `validate_glossary_entry` +- `cmd_memory_init()` at `flowctl.py:4965-5052` — pattern for lazy `decisions/` directory creation +- `cmd_prospect_*` at `flowctl.py:7534-7952` — cleanest recent pattern for new `cmd_glossary_*` subcommands +- `get_repo_root()` at `flowctl.py:87-99` — anchor for nearest-ancestor walk +- `flow-next-interview/SKILL.md:135-142` — Investigate-Codebase-Before-Asking pattern; doc-aware Phase-zero glossary scan layers on +- `flow-next-interview/questions.md:5-21` — Pre-Question Taxonomy; doc-aware mode adds glossary-lookup as a third axis +- `flow-next-audit/workflow.md:21-127` — Phase 0 walks `MEMORY_CATEGORIES`; decisions walk is automatic once schema extended +- `agents/docs-gap-scout.md:39, 59-68` — current scan list + change-type → doc-update mapping +- `scripts/sync-codex.sh:485-491` — `AskUserQuestion → request_user_input` rewrite (no new sync rule needed) + +**Format precedents** (from docs-scout): + +- H2-per-term + paragraph definition: GitBook + glossarify-md convention +- Y-statement (1-sentence ADR floor) for decision body shape +- tsconfig.json first-match-wins for nearest-ancestor walk +- gitignore ceiling-at-git-root for walk bounding + +**Net-new code** (no precedent in repo): + +- `find_nearest_glossary(start: Path) -> Optional[Path]` near `flowctl.py:87` — bounded ancestor walk +- Markdown section parser (regex `re.finditer` on H2 headings, with fenced-code stripping) +- `validate_glossary_entry` (term + definition + optional aliases shape) +- `cmd_glossary_add/list/read/remove` +- Argparse `glossary` subparser registration after `prospect_sub` block +- Doc-aware autodetect bash in `flow-next-interview/SKILL.md` +- `--docs` / `--no-docs` flag parsing in interview slash command +- R17 grep guard in `ci_test.sh` (or new `terminology_smoke_test.sh`) + +## Risks / Dependencies + + + +**Risks:** + +- **Order-of-deps T2 → T3.** Minimum viable T2 to unblock T3 is `glossary add` + `glossary read` (with nearest-ancestor walk). `list` and `remove` can ship later without blocking interview work — but the task is small enough to keep in one piece. +- **Performance of nearest-ancestor walk in agent loops** — unlikely to matter (small N, infrequent reads, depth-capped). Defer caching until profiled. +- **R17 enforcement regression** — manual review on first ship; automated grep guard in T7 prevents regression on subsequent edits. +- **Audit grep scope (R12)** — "marks stale on absence" needs a concrete rule for what counts as "absent". Default: grep tracked code files (excluding `.flow/`, `node_modules`, etc.). T5 settles this. +- **Decision write read-back UX drag** — every gate-pass triggers an AskUserQuestion. If the gate fires often, interviews get long. Mitigation: gate is strict (three criteria); most decisions don't trigger. + +**Dependencies:** + +- No flowctl dependency edges to other open epics (per epic-scout). fn-38 is unblocked. +- Closed-epic foundations: fn-30 (memory schema), fn-34 (audit), fn-36 (interview), fn-15-96t (plan-sync) — listed in Decision Context above. +- Tooling deps: `git rev-parse` for repo-root resolution (already in flowctl); stdlib `re` for markdown parsing (no new deps). + +## Test notes + + + +- **Test framework**: bash smoke tests (`plugins/flow-next/scripts/*_smoke_test.sh`), each refusing to run from main repo. Pattern: pure bash + inline python heredocs. Reference: `audit_smoke_test.sh:1-90`. +- **New smoke**: `plugins/flow-next/scripts/glossary_smoke_test.sh` covers: nearest-ancestor walk (root + subdir), atomic writes, multi-line definition via stdin, parse roundtrip, `_Avoid_` aliases, term removal. +- **Extended smoke**: `ci_test.sh` memory section (`ci_test.sh:170-180`) gains a `decisions` track assertion; new R17 grep guard scans skill prose + flowctl.py for forbidden DDD vocabulary. +- **No new pytest**: flowctl tests are bash + python heredocs; the project does not use pytest. + +## References + +- **flow-next captured spec**: `.flow/specs/fn-38-project-glossary-decision-records-and.md` (this file) +- **Memory schema constants**: `plugins/flow-next/scripts/flowctl.py:3659-3744` +- **Atomic write helper**: `plugins/flow-next/scripts/flowctl.py:798-809` +- **Memory subcommand patterns**: `plugins/flow-next/scripts/flowctl.py:5107-5660` (memory_add/read/list) +- **Prospect subcommand patterns** (cleaner recent reference): `plugins/flow-next/scripts/flowctl.py:7534-7952` +- **Interview skill (doc-aware extension point)**: `plugins/flow-next/skills/flow-next-interview/SKILL.md` + `questions.md` +- **Audit skill (R12 extension point)**: `plugins/flow-next/skills/flow-next-audit/workflow.md:21-127` +- **Plan-sync agent (R13 extension point)**: `plugins/flow-next/agents/plan-sync.md:85-103` +- **docs-gap-scout (R11 extension point)**: `plugins/flow-next/agents/docs-gap-scout.md:39, 59-68` +- **sync-codex.sh (validation gate)**: `scripts/sync-codex.sh:485-491, 760-770` +- **External format precedents**: + - [npryce/adr-tools](https://github.com/npryce/adr-tools) — original Nygard ADR template + - [Y-statements (Olaf Zimmermann)](https://medium.com/olzzio/y-statements-10eb07b5a177) — 1-sentence ADR shape + - [open-gitops GLOSSARY.md](https://github.com/open-gitops/documents/blob/main/GLOSSARY.md) — H2-per-term real-world model + - [glossarify-md](https://github.com/about-code/glossarify-md) — H2-per-term tooling expectation + - [EditorConfig spec](https://spec.editorconfig.org/) — ancestor walk semantics + - [TypeScript handbook (tsconfig.json)](https://www.typescriptlang.org/docs/handbook/tsconfig-json.html) — first-match-wins resolution + +## Quick commands + +```bash +# Glossary smoke test (T2 ships this) +plugins/flow-next/scripts/glossary_smoke_test.sh + +# Decisions category — schema validation (T1 ships this) +.flow/bin/flowctl memory init +.flow/bin/flowctl memory add --track knowledge --category decisions \ + --title "Use nearest-ancestor for glossary lookup" \ + --body "Hard to reverse: clients depend on resolution behavior. Surprising: not the obvious 'always-root' default. Trade-off: subdir flexibility vs single-source-of-truth simplicity." + +# Doc-aware autodetect (T3 ships this) +# T2 leaves an `# Glossary` H1 husk on disk after the last term is removed (R18 — never deletes +# the file), so a plain `[[ -f GLOSSARY.md ]]` would falsely activate doc-aware mode on a husk. +# Use the JSON shape from T2: `total_terms > 0` ignores empty husks deterministically. +.flow/bin/flowctl glossary list --json | jq '.total_terms > 0' +.flow/bin/flowctl memory list --track knowledge --category decisions --json | jq '.entries | length' + +# R17 terminology guard (T7 ships this) +grep -RnE 'ubiquitous language|bounded context|domain expert|aggregate root' \ + plugins/flow-next/skills plugins/flow-next/scripts/flowctl.py \ + plugins/flow-next/agents plugins/flow-next/commands && \ + echo "FAIL: DDD jargon detected" || echo "PASS" +``` + +## Early proof point + +Task `fn-38-...2` (flowctl glossary plumbing) validates the fundamental approach: `GLOSSARY.md` format + nearest-ancestor walk + atomic writes round-trip cleanly. If parser fails on multi-line definitions, or nearest-ancestor walk has subtle bugs (cycle handling, ceiling detection), or the file format proves unreadable in practice, the entire downstream chain (T3 interview integration, T4-T6 scout/audit/sync extensions) needs revision before continuing. Re-evaluate format choice (H2-per-term vs alternatives) and walk algorithm before T3+ proceeds. + +## Requirement coverage + +| Req | Description | Task(s) | Gap justification | +|-----|-------------|---------|-------------------| +| R1 | GLOSSARY.md at repo root + subdirs supported | fn-38-...2 | — | +| R2 | `decisions` knowledge category exists | fn-38-...1 | — | +| R3 | Nearest-ancestor walk for lookups | fn-38-...2 | — | +| R4 | No meta-file introduced | fn-38-...2, fn-38-...7 | T2 doesn't add one; T7 grep-verifies | +| R5 | Interview autodetects doc-aware mode | fn-38-...3 | — | +| R6 | `--docs` / `--no-docs` flags | fn-38-...3 | — | +| R7 | Term-conflict surfaced when load-bearing | fn-38-...3 | — | +| R8 | Inline glossary write on resolution | fn-38-...3 | — | +| R9 | Code/spec contradiction surfaced | fn-38-...3 | — | +| R10 | Three-criteria gate + read-back for decision write | fn-38-...3 | — | +| R11 | docs-gap-scout extends scan | fn-38-...4 | — | +| R12 | /flow-next:audit walks glossary + decisions | fn-38-...5 | — | +| R13 | /flow-next:sync detects glossary/decision drift | fn-38-...6 | — | +| R14 | flowctl glossary subcommands + multi-line | fn-38-...2 | — | +| R15 | GLOSSARY.md is human-readable markdown | fn-38-...2 | — | +| R16 | Decision body 1-3 sentence floor | fn-38-...1 | Schema permissive; format documented in T1 | +| R17 | No DDD terminology + automated grep | fn-38-...3, fn-38-...7 | T3 enforces in prose; T7 ships the grep | +| R18 | Glossary survives flow-next uninstall | fn-38-...2 | Root placement satisfies; T2 verifies | diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.1.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.1.json new file mode 100644 index 00000000..8fce439d --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.1.json @@ -0,0 +1,14 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:53.686885Z", + "depends_on": [], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.1", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.1.md", + "status": "todo", + "title": "Decisions memory category schema", + "updated_at": "2026-04-30T08:04:43.004820Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.1.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.1.md new file mode 100644 index 00000000..02697e91 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.1.md @@ -0,0 +1,47 @@ +--- +satisfies: [R2, R16] +--- + +## Description + +Extend the categorized memory schema with a `decisions` knowledge category and decision-specific optional frontmatter fields. Pure schema/plumbing change; no skill behavior — interview's three-criteria gate (T3) and audit's per-entry judge (T5) consume this category once it lands. + +**Size:** S +**Files:** `plugins/flow-next/scripts/flowctl.py`, `plugins/flow-next/scripts/ci_test.sh` + +## Approach + +- Add `"decisions"` to `MEMORY_CATEGORIES["knowledge"]` list at `flowctl.py:3670-3676`. +- Extend `MEMORY_KNOWLEDGE_FIELDS` (or add a parallel `MEMORY_DECISION_FIELDS`) at `flowctl.py:3679-3698` with optional fields: `decision_status` (enum: `proposed | accepted | superseded`), `superseded_by`, `alternatives_considered` (list). +- Update `MEMORY_FIELD_ORDER` at `flowctl.py:3722-3741` with explicit slots for new fields (preserves deterministic write order — gotcha flagged by repo-scout). +- Update `cmd_memory_init()` at `flowctl.py:4965-5052` so `flowctl memory init` lazy-creates `.flow/memory/knowledge/decisions/.gitkeep` (extend the directory loop ~line 5011). +- Validator (`validate_memory_frontmatter` at `flowctl.py:4571-4655`) picks up new optional fields automatically via the allowed-fields union. Add an enum-check for `decision_status` per the `MEMORY_STATUS` precedent at `flowctl.py:3718`. +- `ci_test.sh:170-180` Memory System block: add an assertion that creates a decisions-track entry with all three optional fields, reads it back via `flowctl memory list --track knowledge --category decisions --json`, and verifies field round-trip. + +## Investigation targets + +**Required:** +- `plugins/flow-next/scripts/flowctl.py:3659-3744` — full memory-schema constants block +- `plugins/flow-next/scripts/flowctl.py:4965-5052` — `cmd_memory_init` lazy-create pattern (extend the directory loop) +- `plugins/flow-next/scripts/flowctl.py:4571-4655` — `validate_memory_frontmatter` +- `plugins/flow-next/scripts/ci_test.sh:170-180` — Memory System test section + +**Optional:** +- `plugins/flow-next/scripts/flowctl.py:3722-3741` — `MEMORY_FIELD_ORDER` (deterministic write-order tuple) + +## Acceptance + +- [ ] `flowctl memory init` creates `.flow/memory/knowledge/decisions/.gitkeep` +- [ ] `flowctl memory add --track knowledge --category decisions --title "..." --body "..."` succeeds and writes to `.flow/memory/knowledge/decisions/-.md` +- [ ] Optional fields `decision_status`, `superseded_by`, `alternatives_considered` round-trip through frontmatter (write → read → match) +- [ ] `decision_status` rejects values outside the `proposed | accepted | superseded` enum +- [ ] `flowctl memory list --track knowledge --category decisions` returns the entry +- [ ] `MEMORY_FIELD_ORDER` includes explicit slots for new fields (deterministic write order verified by repeated read+write cycle) +- [ ] `ci_test.sh` decisions-track assertion passes + +## Done summary +Extended categorized memory schema with knowledge/decisions category + decision-specific optional fields (decision_status enum, superseded_by, alternatives_considered); flowctl memory init lazy-creates the directory via the existing loop, MEMORY_FIELD_ORDER carries explicit slots for deterministic write order, and validate_memory_frontmatter enum-checks decision_status. ci_test.sh gains a decisions-track section covering round-trip, negative enum, deterministic order, and lazy-dir-create; smoke_test.sh placeholder count updated 5 to 6 + explicit decisions/.gitkeep check. +## Evidence +- Commits: 1cdf606f1a1ccf57d6187f7573de5adca7a97717 +- Tests: bash plugins/flow-next/scripts/ci_test.sh (54/54 pass), bash plugins/flow-next/scripts/smoke_test.sh (130/130 pass) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.2.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.2.json new file mode 100644 index 00000000..54f97859 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.2.json @@ -0,0 +1,14 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:53.840002Z", + "depends_on": [], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.2", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.2.md", + "status": "todo", + "title": "flowctl glossary subcommands + nearest-ancestor walk", + "updated_at": "2026-04-30T08:04:43.159079Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.2.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.2.md new file mode 100644 index 00000000..8608be4a --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.2.md @@ -0,0 +1,56 @@ +--- +satisfies: [R1, R3, R4, R14, R15, R18] +--- + +## Description + +Implement the glossary plumbing: `GLOSSARY.md` file format, nearest-ancestor resolution, and four flowctl subcommands. **This is the early proof point for the epic** — validates file format + walk algorithm + atomic writes round-trip cleanly. If this fails, downstream tasks (T3-T6) need revision before continuing. + +**Size:** M +**Files:** `plugins/flow-next/scripts/flowctl.py`, `plugins/flow-next/scripts/glossary_smoke_test.sh` + +## Approach + +- New helper `find_nearest_glossary(start: Path = Path.cwd()) -> Optional[Path]` near `flowctl.py:87`. **First-match-wins ancestor walk** (tsconfig pattern), bounded at `get_repo_root()` (gitignore convention), defensive 32-level cap (defends against pathological symlinks per practice-scout). Walk via `Path.parent`; do NOT manually follow symlinks (kernel handles `ELOOP`). +- New `parse_glossary_file(text: str) -> list[GlossaryEntry]`. Use stdlib `re.finditer` on `^##\s+(.+)$` (multiline). Pre-strip fenced code blocks via `re.sub(r'\`\`\`.*?\`\`\`', '', text, flags=re.DOTALL)`. Pre-normalize CRLF. Capture per term: heading text, definition paragraph (heading-end → next-heading-start), `_Avoid_:` italic line (regex `^_Avoid_:\s*(.+)$`), optional `_Relates to_:` anchor links. +- New `validate_glossary_entry(entry)` — schema check before write. Pattern from `validate_prospect_frontmatter` at `flowctl.py:4150-4182`. Required: term name + definition. Optional: avoid (list), relates_to (list). +- New `cmd_glossary_add/list/read/remove`. Pattern from `cmd_prospect_*` at `flowctl.py:7534-7952` (cleanest recent reference). **Atomic-rewrite**: read full file, mutate in-memory entry list, render, `atomic_write` whole file (`flowctl.py:798`). Whole-file replace is correct because glossary writes are coarse-grained. +- **Multi-line definitions:** `--definition "..."` stays single-line. `--definition-file -` reads stdin; `--definition-file ` reads file. Behavior (b) inline writes (T3) use stdin variant. +- **Write-target rule:** `add` writes to nearest-ancestor (matches read resolution). To force creation of a subdirectory glossary, drop an empty `GLOSSARY.md` first; subsequent `add` from inside that subtree writes to it. No `--scope` flag. +- Argparse `glossary` subparser registration after the `prospect_sub` block at `flowctl.py:15860-15928`. +- New smoke `plugins/flow-next/scripts/glossary_smoke_test.sh` (pattern from `audit_smoke_test.sh:1-90`): refuses to run from main repo; uses `/tmp/glossary-smoke-$$`. Cases: nearest-ancestor walk (root + subdir), atomic write, multi-line via stdin, parse roundtrip, `_Avoid_` aliases, term removal, last-term-removal hygiene (file becomes empty H1 husk → keep file or delete? — match Constraints: keep husk; husk re-trips autodetect-on but autodetect requires non-empty term list per T3 logic). +- **R18 verified by smoke:** `rm -rf .flow/` between two glossary-write phases; verify `GLOSSARY.md` files survive. + +## Investigation targets + +**Required:** +- `plugins/flow-next/scripts/flowctl.py:87-99` — `get_repo_root` (anchor for ancestor walk) +- `plugins/flow-next/scripts/flowctl.py:798-809` — `atomic_write` +- `plugins/flow-next/scripts/flowctl.py:866-895` — `slugify` (term-name normalization for case-insensitive match) +- `plugins/flow-next/scripts/flowctl.py:4150-4182` — `validate_prospect_frontmatter` (schema-validation template) +- `plugins/flow-next/scripts/flowctl.py:7534-7952` — `cmd_prospect_*` (subcommand pattern) +- `plugins/flow-next/scripts/flowctl.py:15860-15928` — `prospect_sub` argparse registration template +- `plugins/flow-next/scripts/audit_smoke_test.sh:1-90` — smoke test structure + +**Optional:** +- [open-gitops/documents/GLOSSARY.md](https://github.com/open-gitops/documents/blob/main/GLOSSARY.md) — real-world H2-per-term reference + +## Acceptance + +- [ ] `flowctl glossary add Term --definition "single-line def" --avoid "alias1,alias2"` writes to root `GLOSSARY.md` (creates if missing); single-line definition round-trips +- [ ] `flowctl glossary add Term --definition-file -` (stdin) accepts multi-line definition; round-trips with newlines preserved +- [ ] `flowctl glossary read Term` resolves via nearest-ancestor walk from cwd; subdir glossary wins when present, root wins otherwise (R3) +- [ ] `flowctl glossary list --json` returns terms grouped by file when multiple `GLOSSARY.md` files exist on the chain +- [ ] `flowctl glossary remove Term` deletes the entry from the file that defines it +- [ ] Nearest-ancestor walk stops at `get_repo_root()`, filesystem boundary (`st_dev` change), or 32-level cap; symlink loops do not hang (kernel `ELOOP` covered) +- [ ] `GLOSSARY.md` is human-readable markdown with H2-per-term sections (R15) +- [ ] No meta-file (e.g. `GLOSSARY-MAP.md`) is added in this task or anywhere in the codebase (R4 — verified by T7 grep guard) +- [ ] `rm -rf .flow/` does not affect any `GLOSSARY.md` file (R18 verified by smoke) +- [ ] `glossary_smoke_test.sh` passes (covers all the above + parse roundtrip + fenced-code stripping + last-term-removal hygiene) + +## Done summary +Implemented `flowctl glossary {add,list,read,remove}` with `find_nearest_glossary` (cwd → git root, bounded by filesystem boundary + 32-level cap), byte-aligned fenced-code stripping in the parser (so headings inside ``` blocks aren't picked up), atomic whole-file rewrite, and a 25-case / 80-assertion `glossary_smoke_test.sh` covering nearest-ancestor walk, atomic-write crash simulation, multi-line stdin round-trip, parse roundtrip, _Avoid_ + _Relates to_ preservation, husk hygiene on last-term removal, and R18 (.flow/ removal preserves GLOSSARY.md). Early proof point landed cleanly — file format + walk algorithm + atomic writes round-trip; downstream T3-T6 unblocked. +## Evidence +- Commits: 9921ace0b82d24290b55ec40dbe44b0485b80bcc +- Tests: bash plugins/flow-next/scripts/glossary_smoke_test.sh (80/80 pass), bash plugins/flow-next/scripts/smoke_test.sh (130/130 pass), bash plugins/flow-next/scripts/audit_smoke_test.sh (41/41 pass), bash plugins/flow-next/scripts/prospect_smoke_test.sh (94/94 pass), bash plugins/flow-next/scripts/ci_test.sh (54/54 pass) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.3.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.3.json new file mode 100644 index 00000000..bd2685a9 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.3.json @@ -0,0 +1,17 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:53.990685Z", + "depends_on": [ + "fn-38-project-glossary-decision-records-and.1", + "fn-38-project-glossary-decision-records-and.2" + ], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.3", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.3.md", + "status": "todo", + "title": "/flow-next:interview doc-aware mode", + "updated_at": "2026-04-30T08:04:43.313764Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.3.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.3.md new file mode 100644 index 00000000..fd4b6230 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.3.md @@ -0,0 +1,58 @@ +--- +satisfies: [R5, R6, R7, R8, R9, R10] +--- + +## Description + +Add doc-aware mode to `/flow-next:interview`: autodetect on `GLOSSARY.md` or `knowledge/decisions/` presence, `--docs` / `--no-docs` flags, four layered behaviors (glossary scan, fuzzy-term sharpening, code/spec contradiction surfacing, inline writes with three-criteria gate + read-back). Pure prose changes (no flowctl plumbing in this task). + +**Size:** M +**Files:** `plugins/flow-next/skills/flow-next-interview/SKILL.md`, `plugins/flow-next/skills/flow-next-interview/questions.md`, `plugins/flow-next/commands/flow-next/interview.md`, regenerate Codex mirror via `scripts/sync-codex.sh` + +## Approach + +- **Autodetect bash** in `SKILL.md` Setup section (after line ~58): treat `GLOSSARY.md` as auto-aware ONLY when it has at least one defined term. Use `flowctl glossary list --json | jq '.total_terms > 0'` (NOT `[[ -f GLOSSARY.md ]]`) — T2 leaves a `# Glossary` husk on disk after last-term-removal, and an empty husk must NOT trip autodetect. Decisions track: `flowctl memory list --track knowledge --category decisions --json | jq '.entries | length > 0'`. Set `DOC_AWARE=1` if either fires. +- **Flag parsing** (pattern from `audit/SKILL.md:30-40` `mode:autofix` token): `--docs` forces `DOC_AWARE=1` (and lazy-creates root `GLOSSARY.md` on first term resolution via `flowctl glossary add`, which writes to nearest-ancestor or repo root when none exists); `--no-docs` forces `DOC_AWARE=0`. +- **Behavior (a) — Phase-zero glossary scan**: when `DOC_AWARE=1`, before drafting the first question batch, run `flowctl glossary list --json` and intersect terms with the user's request. JSON shape: `{groups: [{path, entries: [{term, definition, avoid, relates_to}], count}], file_count, total_terms}`. For each defined term in the request, evaluate: is the term load-bearing for the spec's behavior? If yes AND user wording conflicts with canonical (term match is case-insensitive whitespace-collapsed per T2's `_glossary_term_matches`; alias hits via `entries[].avoid`), surface as the first interview question via `AskUserQuestion`. **Throttle** (R7 + Constraints): casual passing mention → no question; behavior-defining mention → question. +- **Behavior (b) — Fuzzy-term sharpening**: when overloaded language emerges across the conversation, propose canonical via `AskUserQuestion` (lead-with-rec + confidence tier). On user-pick, build the resolved definition and call `flowctl glossary add --definition-file -` (pipe stdin) before next question. `add` is upsert: case-insensitive match replaces the existing entry in full; new terms append at the end. The next question can re-read glossary; cache freshness handled by re-read on every glossary-aware turn (no in-memory cache). +- **Behavior (c) — Code/spec contradiction**: extend `## Investigate Codebase Before Asking` (SKILL.md:135-142). When grep reveals code disagrees with a user assertion, escalate from silent `## Resolved via Codebase` log to an `AskUserQuestion`. Body: "Code shows X (file:line); you said Y. Which?" Confidence: `[high]` when grep evidence is unambiguous. +- **Behavior (d) — Decision write**: when interview surfaces an architectural decision, evaluate three-criteria gate (hard-to-reverse + surprising-without-context + real-trade-off). If all three hold, draft entry (title + 1-3 sentence body + optional `Considered Options` + optional `Consequences`) and show via `AskUserQuestion` (capture/audit pattern). On `approve`, call `flowctl memory add --track knowledge --category decisions ...`. Never write silently. +- **questions.md (Pre-Question Taxonomy at lines 5-21)**: add glossary-lookup as a third axis. Codebase-answerable + glossary-lookup-answerable → resolved silently in `## Resolved via Codebase` (or `## Glossary Conflicts` for behavior-a hits). User-judgment-required → `AskUserQuestion`. +- **interview.md (slash command)**: document `--docs` / `--no-docs` flags in the args section. +- **R17 enforcement**: skill prose must NOT use "ubiquitous language", "bounded context", "domain expert", "aggregate root", or equivalent DDD vocabulary. T7 ships the automated grep guard; manual review on this task's first ship. +- **Run `scripts/sync-codex.sh`** after prose changes; validation block (`scripts/sync-codex.sh:760-770`) must pass: no `AskUserQuestion` literals leak into Codex mirror, no DDD jargon, all required `openai.yaml` files present. + +## Investigation targets + +**Required:** +- `plugins/flow-next/skills/flow-next-interview/SKILL.md:55-75` — Setup + Detect Input Type (autodetect insertion point) +- `plugins/flow-next/skills/flow-next-interview/SKILL.md:135-142` — Investigate Codebase Before Asking (extension point for behavior c) +- `plugins/flow-next/skills/flow-next-interview/SKILL.md:178-191` — `## Resolved via Codebase` convention +- `plugins/flow-next/skills/flow-next-interview/questions.md:5-21` — Pre-Question Taxonomy +- `plugins/flow-next/skills/flow-next-audit/SKILL.md:30-40` — `mode:autofix` token parsing template +- `plugins/flow-next/commands/flow-next/interview.md` — slash command entry +- `scripts/sync-codex.sh:485-491, 760-770` — rewrite + validation rules + +**Optional:** +- `plugins/flow-next/skills/flow-next-capture/workflow.md` — read-back pattern reference for decision write + +## Acceptance + +- [ ] Interview autodetects doc-aware mode when `GLOSSARY.md` has ≥1 defined term (`total_terms > 0`) OR `.flow/memory/knowledge/decisions/` has any entry; off when neither — empty husk (post-last-term-removal) does NOT trip autodetect (R5) +- [ ] Term matching uses the same case-insensitive whitespace-collapsed rule as `flowctl glossary read` (`_glossary_term_matches` in flowctl.py) — do NOT reinvent +- [ ] `--docs` flag forces doc-aware on (lazy-creates root `GLOSSARY.md` on first term resolution); `--no-docs` forces off (R6) +- [ ] When user wording conflicts with canonical glossary term AND term is load-bearing for current spec, conflict surfaces as `AskUserQuestion` (R7); passing mention does NOT trigger +- [ ] When fuzzy term resolves, definition is written to nearest-ancestor `GLOSSARY.md` via `flowctl glossary add --definition-file -` before next question (R8) +- [ ] When grep reveals code-vs-assertion contradiction, surfaced as `AskUserQuestion` (not silently resolved) (R9) +- [ ] Decision entries write only when three-criteria gate passes; agent shows draft via `AskUserQuestion` before write (R10) +- [ ] questions.md Pre-Question Taxonomy gains glossary-lookup as a third axis +- [ ] interview.md documents `--docs` / `--no-docs` flags +- [ ] `scripts/sync-codex.sh` runs clean (no `AskUserQuestion` / `Task` literals in Codex mirror, no DDD jargon, no missing `openai.yaml`) +- [ ] Manual smoke: invoke `/flow-next:interview` in a project with sample `GLOSSARY.md` containing a known canonical term + conflicting user wording; verify behavior (a) question fires + +## Done summary +Added doc-aware mode to /flow-next:interview: husk-aware autodetect (glossary `total_terms > 0` OR any decision entry), `--docs` / `--no-docs` flags, four layered behaviors (phase-zero glossary scan with load-bearing throttle, fuzzy-term sharpening with stdin upsert, code-vs-assertion contradiction surfacing, three-criteria gated decision-record writes with read-back). Pure prose changes; no flowctl plumbing. Codex mirror regenerated cleanly (no AskUserQuestion leak, no DDD jargon, all required openai.yaml present). +## Evidence +- Commits: 8e12e406d26aabc0e5f985abbd01e95bb9deac3e +- Tests: ./scripts/sync-codex.sh (green: 21 skills/agents, all 14 required openai.yaml, no AskUserQuestion leak in Codex mirror), manual: empty repo autodetect=0; one term added autodetect=1; husk-after-remove autodetect=0 (R5 critical case); decisions-only autodetect=1; glossary read case-insensitive+whitespace-collapsed; --definition-file - multi-line round-trip; flag parse 5 cases, grep R17: no DDD jargon in canonical, codex/, agents/, commands/, flowctl.py +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.4.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.4.json new file mode 100644 index 00000000..16d45017 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.4.json @@ -0,0 +1,17 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:54.144809Z", + "depends_on": [ + "fn-38-project-glossary-decision-records-and.1", + "fn-38-project-glossary-decision-records-and.2" + ], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.4", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.4.md", + "status": "todo", + "title": "docs-gap-scout extends scan to GLOSSARY.md + decisions/", + "updated_at": "2026-04-30T08:04:43.468189Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.4.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.4.md new file mode 100644 index 00000000..91b00c12 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.4.md @@ -0,0 +1,44 @@ +--- +satisfies: [R11] +--- + +## Description + +Extend the `docs-gap-scout` agent to scan `GLOSSARY.md` (root + subdirectories) and `.flow/memory/knowledge/decisions/` entries during planning. When a planned change touches a defined term or invalidates a decision constraint, the scout flags those targets in its output. + +**Size:** S +**Files:** `plugins/flow-next/agents/docs-gap-scout.md`, regenerate Codex mirror via `scripts/sync-codex.sh` + +## Approach + +- Update the doc-location scan list at `docs-gap-scout.md:39` to include: + - `GLOSSARY.md` (root) — direct check; prefer `flowctl glossary list --json` (JSON shape per fn-38.2: `{groups: [{path, entries, count}], file_count, total_terms}`) since it walks ancestors and respects gitignore conventions; raw `find` only as fallback. Empty husks (`count: 0` after last-term-removal — fn-38.2 keeps the file per R18) carry no terms — skip them, don't flag as drift signal. + - Subdirectory `GLOSSARY.md` — covered by `glossary list --json` (walks ancestors); raw fallback `find . -name GLOSSARY.md -not -path './node_modules/*' -not -path './.git/*'` + - `.flow/memory/knowledge/decisions/` — direct directory check +- Extend the change-type → doc-update mapping (`docs-gap-scout.md:59-68`) with two new rows: + - **"Glossary term touched"** — when the planned-change diff modifies code that uses a term defined in any `GLOSSARY.md`, flag the glossary entry (file + term name) for review + - **"Decision constraint"** — when the planned-change touches a file referenced in a decision entry's `Consequences` section, flag the decision entry (id + title) for review +- Run `scripts/sync-codex.sh` to regenerate `plugins/flow-next/codex/agents/docs-gap-scout.toml`. Verify the Codex mirror picks up the changes (no `AskUserQuestion` / DDD-jargon validator failures). +- **R17 compliance**: do NOT use DDD terminology in agent prose. + +## Investigation targets + +**Required:** +- `plugins/flow-next/agents/docs-gap-scout.md:39` — current scan list +- `plugins/flow-next/agents/docs-gap-scout.md:59-68` — change-type → doc-update mapping table +- `plugins/flow-next/codex/agents/docs-gap-scout.toml` — Codex mirror (auto-regen) + +## Acceptance + +- [ ] `docs-gap-scout` scan list includes `GLOSSARY.md` (root + subdirs via `find`) and `.flow/memory/knowledge/decisions/` +- [ ] Mapping table has rows for "Glossary term touched" and "Decision constraint" change-types +- [ ] `scripts/sync-codex.sh` regenerates `codex/agents/docs-gap-scout.toml` cleanly +- [ ] No DDD jargon in agent prose (R17 — manually verify; T7 grep guard catches regressions) +- [ ] Manual smoke: invoke `docs-gap-scout` on a planned change touching a glossary-defined term; verify glossary entry surfaces in scout output + +## Done summary +Extended `docs-gap-scout` to scan `GLOSSARY.md` (root + subdirs via `flowctl glossary list --json` with raw `find` fallback, skipping husks where `count: 0`) and `.flow/memory/knowledge/decisions/`, plus added "Glossary term touched" and "Decision constraint" rows to the change-type → doc-update mapping. Codex mirror regenerated via `sync-codex.sh` (clean), R17 grep clean (no DDD jargon). +## Evidence +- Commits: d1823b60f7f4e1775ad47562328b2037891c5221 +- Tests: scripts/sync-codex.sh (21 skills, 21 agents, hooks.json — clean), plugins/flow-next/scripts/smoke_test.sh from /tmp dir (130/130 pass), grep -RnE 'ubiquitous language|bounded context|domain expert|aggregate root' across plugins/flow-next/{skills,scripts/flowctl.py,agents,commands,codex} → PASS (no matches) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.5.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.5.json new file mode 100644 index 00000000..1ffde00a --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.5.json @@ -0,0 +1,17 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:54.292901Z", + "depends_on": [ + "fn-38-project-glossary-decision-records-and.1", + "fn-38-project-glossary-decision-records-and.2" + ], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.5", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.5.md", + "status": "todo", + "title": "/flow-next:audit walks glossary terms + decision entries", + "updated_at": "2026-04-30T08:04:43.622539Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.5.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.5.md new file mode 100644 index 00000000..6a901fe9 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.5.md @@ -0,0 +1,55 @@ +--- +satisfies: [R12] +--- + +## Description + +Extend `/flow-next:audit` to walk glossary terms (grep code for term + `_Avoid_` aliases; mark stale on absence; surface alias-creep) and decision entries (verify the constraint still holds; prompt for supersession on conflict). + +**Size:** M +**Files:** `plugins/flow-next/skills/flow-next-audit/workflow.md`, `plugins/flow-next/skills/flow-next-audit/phases.md`, possibly `plugins/flow-next/skills/flow-next-audit/SKILL.md`, regenerate Codex mirror + +## Approach + +- **Decision walk is automatic.** Once T1 ships and `MEMORY_CATEGORIES["knowledge"]` includes `"decisions"`, the existing memory-walk in `workflow.md:21-127` (Phase 0) picks up `decisions/` entries. Document calibration in `phases.md`: for decision entries, the per-entry judge asks "does the constraint that motivated this decision still hold?" instead of the generic "is this still relevant?". The 5 outcomes (Keep / Update / Consolidate / Replace / Delete) carry over but `Replace` for decisions means "supersede" — write a new entry pointing at the old via `superseded_by`. +- **Glossary walk is new.** Add a new phase (e.g. Phase 0.5 "Glossary scan") that runs after the memory walk: + - Enumerate glossaries via `flowctl glossary list --json` (preferred — single source of truth, respects nearest-ancestor walk); fallback `find . -name GLOSSARY.md -not -path './node_modules/*' -not -path './.git/*'` for the legacy/non-cwd case. JSON shape: `{groups: [{path, entries: [{term, definition, avoid, relates_to}], count}], file_count, total_terms}`. + - For each term in each file: `grep -rE "\\b\\b" ` (code-paths = tracked source files; exclude `.flow/`, `node_modules/`, build artifacts; respect `.gitignore` via `git ls-files | xargs grep`). Match terms case-insensitively whitespace-collapsed (matches T2's `_glossary_term_matches` invariant). + - Zero hits AND zero `_Avoid_` alias hits → mark stale via Edit tool on the glossary file with a `` HTML comment after the term heading. **fn-38.2 did NOT ship a `flowctl glossary mark-stale` subcommand** — Edit-tool path is the only option. The agent must NOT delete the term entry on stale-detection (deletion is the operator's call). + - **Husk awareness**: glossaries with `count: 0` (file is `# Glossary` H1 husk after last-term-removal) skip the walk — no terms to audit. Surface a Phase 3 advisory: "GLOSSARY.md at is an empty husk; remove the file manually if it's no longer needed (T2 keeps it as project-state per R18)." + - `_Avoid_` alias creeping into new code (alias hits in code; `entries[].avoid` is a `list[str]`) → Phase 3 question: "alias `` is appearing in code at ; rename uses to `` or update glossary?" +- Update `SKILL.md` allowed-tools if needed (likely no change — Read/Grep/Glob/Edit already allowed via the existing `allowed-tools` frontmatter). +- Update Phase 0 prose to mention that decisions/ entries are included automatically (post-T1). +- Run `scripts/sync-codex.sh` to regenerate Codex mirror. +- **R17 compliance**: no DDD terminology in skill prose. + +## Investigation targets + +**Required:** +- `plugins/flow-next/skills/flow-next-audit/workflow.md:21-127` — Phase 0 memory walk (decisions auto-included; document) +- `plugins/flow-next/skills/flow-next-audit/workflow.md:131-204` — Phase 1 investigation pattern (template for glossary walk) +- `plugins/flow-next/skills/flow-next-audit/phases.md` — phase reference + outcome calibration table (extend with decision calibration) +- `plugins/flow-next/skills/flow-next-audit/SKILL.md` — top-level skill behavior + +**Optional:** +- `plugins/flow-next/scripts/flowctl.py` — `cmd_memory_mark_stale` precedent if extending to glossary stale-marking + +## Acceptance + +- [ ] `/flow-next:audit` walk includes glossary terms (root + subdir `GLOSSARY.md` files via `find`) +- [ ] For each term: grep tracked code files for term + `_Avoid_` aliases; absence on both → marked/flagged stale via Edit tool with `` HTML comment after the term heading (no `flowctl glossary mark-stale` exists post-T2) +- [ ] `_Avoid_` alias appearing in code surfaces as a Phase 3 question (interactive) or stale-flag (autofix) +- [ ] Decisions track is automatically walked once schema extension lands (T1 dep verified) +- [ ] Decision-entry per-entry judge calibrates on "constraint still holds?" — `phases.md` documents this +- [ ] `Replace` outcome for decisions = write new entry with `superseded_by: `; old entry's `decision_status` set to `superseded` +- [ ] `phases.md` documents the new glossary phase + decision calibration +- [ ] `scripts/sync-codex.sh` regenerates Codex mirror cleanly +- [ ] No DDD jargon in skill prose (R17) +- [ ] Manual smoke: run `/flow-next:audit` on a project with stale glossary term + outdated decision; both surface in the report + +## Done summary +Extended `/flow-next:audit` to walk glossary terms (new Phase 0.5: `flowctl glossary list --json` enumeration, code-grep per term + `_Avoid_` aliases, Edit-tool stale comment on absence, alias-creep surfacing, husk awareness) and decision entries (automatic via T1's `MEMORY_CATEGORIES["knowledge"]` extension, with calibrated judging question and supersede-not-delete Replace flow). Pure prose changes across `SKILL.md`, `workflow.md`, `phases.md`; Codex mirror regenerated; sync-codex.sh + audit_smoke_test.sh (41/41) + glossary_smoke_test.sh (80/80) + R17 grep guard all green. +## Evidence +- Commits: ffd9535094cb4216232d2a0b69dea2405250ecd8 +- Tests: ./scripts/sync-codex.sh (clean: 21 skills, 21 agents, hooks.json), plugins/flow-next/scripts/audit_smoke_test.sh (41/41 PASS), plugins/flow-next/scripts/glossary_smoke_test.sh (80/80 PASS), grep -RnE 'ubiquitous language|bounded context|domain expert|aggregate root' across plugins/flow-next/{skills,agents,commands,codex} + flowctl.py → RC=1 (no matches, R17 clean) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.6.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.6.json new file mode 100644 index 00000000..ccd99724 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.6.json @@ -0,0 +1,17 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:54.439279Z", + "depends_on": [ + "fn-38-project-glossary-decision-records-and.1", + "fn-38-project-glossary-decision-records-and.2" + ], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.6", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.6.md", + "status": "todo", + "title": "/flow-next:sync detects glossary renames + decision overrides", + "updated_at": "2026-04-30T08:04:43.779395Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.6.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.6.md new file mode 100644 index 00000000..f3ffd8b4 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.6.md @@ -0,0 +1,44 @@ +--- +satisfies: [R13] +--- + +## Description + +Extend `/flow-next:sync` (plan-sync) drift detection to catch glossary-term renames (term in old spec, new term in current code) and implicit decision overrides (current code violates a decision constraint), updating downstream task specs accordingly. + +**Size:** M +**Files:** `plugins/flow-next/skills/flow-next-sync/SKILL.md`, `plugins/flow-next/agents/plan-sync.md`, regenerate Codex mirror + +## Approach + +- **Sync skill (`flow-next-sync/SKILL.md:97-117`)** — extend Step 5 (Spawn Plan-Sync Agent) to pass two new context types to the agent: + 1. **Glossary state**: `flowctl glossary list --json` output (all defined terms, root + subdirs). JSON shape (per fn-38.2): `{groups: [{path, entries: [{term, definition, avoid, relates_to}], count}], file_count, total_terms}`. Empty husks (`count: 0`) carry no signal — skip them. + 2. **Decision constraints**: `flowctl memory list --track knowledge --category decisions --json` output (all active decisions with their `Consequences` sections) +- **Plan-sync agent (`agents/plan-sync.md:85-103`)** — extend drift-detection prose: + - **Glossary-term renames**: when an old task spec or epic spec references a term, but the current code uses a different term (matching one of the old term's `_Avoid_` aliases — `entries[].avoid` is a `list[str]` per fn-38.2's parser), flag the spec for update. Update downstream task spec wording to use the canonical term. Term match uses the same case-insensitive whitespace-collapsed rule as `_glossary_term_matches` in flowctl.py — do NOT reinvent. + - **Decision overrides**: when current code touches files referenced in an active decision's `Consequences` section in a way that contradicts the decision (e.g. decision says "we use REST not GraphQL" + current code introduces a `/graphql` endpoint), flag the decision id in the sync report. Do NOT auto-supersede; surface for user review. +- Run `scripts/sync-codex.sh` to regenerate `plugins/flow-next/codex/agents/plan-sync.toml`. +- **R17 compliance**: no DDD jargon in skill or agent prose. + +## Investigation targets + +**Required:** +- `plugins/flow-next/skills/flow-next-sync/SKILL.md:97-117` — Step 5 (Spawn Plan-Sync Agent) extension point +- `plugins/flow-next/agents/plan-sync.md:85-103` — drift-detection phase prose +- `plugins/flow-next/codex/agents/plan-sync.toml` — Codex mirror (auto-regen) + +## Acceptance + +- [ ] Sync skill passes glossary-list and decisions-list context to plan-sync agent +- [ ] Plan-sync agent drift-detection detects glossary-term renames (term in old spec, alias in current code) and updates downstream specs +- [ ] Plan-sync agent surfaces decision-override candidates (current code touches files referenced in a decision's `Consequences`) WITHOUT auto-superseding — surface for user review only +- [ ] `scripts/sync-codex.sh` regenerates Codex mirror cleanly +- [ ] No DDD jargon in prose (R17) +- [ ] Manual smoke: run `/flow-next:sync` on a branch with a glossary-term rename in code; verify spec updates land. Run on a branch that violates a decision constraint; verify the decision id surfaces in the report. + +## Done summary +Extended `/flow-next:sync` and the `plan-sync` agent to surface drift signals from project glossaries and active decision records. Sync skill now gathers `flowctl glossary list --json` + `flowctl memory list --track knowledge --category decisions --json` and passes both as `GLOSSARY_JSON`/`DECISIONS_JSON` to the agent; plan-sync gains Phase 3b which (a) renames `_Avoid_` aliases in downstream specs to canonical terms with a breadcrumb when the completed task's code uses the canonical, and (b) flags decision ids whose `Consequences`-referenced files were touched contradictorily — read-only, no auto-supersession. Husks and superseded decisions skipped. Codex mirror regenerated; R17 clean; all six smoke suites green (477/477 combined). +## Evidence +- Commits: 43db08b955a23d9205b9da636e508d4814eeb8ac +- Tests: scripts/sync-codex.sh (clean, 21 skills + 21 agents + hooks), grep -RnE 'ubiquitous language|bounded context|domain expert|aggregate root' across plugins/flow-next/{skills,agents,commands,scripts/flowctl.py} (R17 PASS), plugins/flow-next/scripts/smoke_test.sh (130/130 pass), plugins/flow-next/scripts/glossary_smoke_test.sh (80/80 pass), plugins/flow-next/scripts/audit_smoke_test.sh (41/41 pass), plugins/flow-next/scripts/prospect_smoke_test.sh (94/94 pass), plugins/flow-next/scripts/resolve-pr_smoke_test.sh (58/58 pass), plugins/flow-next/scripts/impl-review_smoke_test.sh (74/74 pass) +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.7.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.7.json new file mode 100644 index 00000000..3b9dd1d2 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.7.json @@ -0,0 +1,16 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:54.587640Z", + "depends_on": [ + "fn-38-project-glossary-decision-records-and.3" + ], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.7", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.7.md", + "status": "todo", + "title": "R17 terminology grep guard (no DDD jargon)", + "updated_at": "2026-04-30T08:09:18.029974Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.7.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.7.md new file mode 100644 index 00000000..b230df26 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.7.md @@ -0,0 +1,97 @@ +--- +satisfies: [R4, R17] +--- + +## Description + +Automated test that fails if any DDD terminology ("ubiquitous language", "bounded context", "domain expert", "aggregate root", or equivalent) appears in skill prose, agent definitions, flowctl Python source, or user-facing slash command files. Also greps for meta-file references (`GLOSSARY-MAP.md`, `CONTEXT-MAP.md`) per R4 cross-coverage. + +**Two-tier check** to mirror the existing `AskUserQuestion` / `ToolSearch` split: +- `ci_test.sh` greps **canonical** files (skills/, agents/, commands/, flowctl.py) +- `scripts/sync-codex.sh` validation block greps the **Codex mirror** (`plugins/flow-next/codex/`) + +**Size:** S +**Files:** `plugins/flow-next/scripts/ci_test.sh`, `scripts/sync-codex.sh` + +## Approach + +### Part 1 — `ci_test.sh` (canonical scan) + +Add a test block (placement: near existing memory or smoke-test sections): + +```bash +# R17: no DDD jargon in canonical user-facing prose +HITS=$(grep -RnE 'ubiquitous language|bounded context|domain expert|aggregate root' \ + plugins/flow-next/skills \ + plugins/flow-next/scripts/flowctl.py \ + plugins/flow-next/agents \ + plugins/flow-next/commands 2>/dev/null || true) +if [[ -n "$HITS" ]]; then + echo "FAIL: DDD jargon detected in canonical:"; echo "$HITS"; exit 1 +fi + +# R4: no meta-file precedent leaks into canonical prose +META_HITS=$(grep -RnE 'GLOSSARY-MAP\.md|CONTEXT-MAP\.md' \ + plugins/flow-next/skills \ + plugins/flow-next/scripts/flowctl.py \ + plugins/flow-next/agents \ + plugins/flow-next/commands 2>/dev/null || true) +if [[ -n "$META_HITS" ]]; then + echo "FAIL: meta-file references detected in canonical:"; echo "$META_HITS"; exit 1 +fi +``` + +Match existing `ci_test.sh` test-block style (`set -euo pipefail`, fail-on-hit pattern). + +### Part 2 — `scripts/sync-codex.sh` (mirror scan) + +Extend the validation block at `scripts/sync-codex.sh:760-770` (alongside the existing `AskUserQuestion` / `ToolSearch` check) with: + +```bash +# Check no DDD terminology in codex mirror prose. Canonical clean + +# mechanical rewrite should keep mirror clean, but a derived artifact +# deserves its own validation. +ddd_refs=$( { grep -rE 'ubiquitous language|bounded context|domain expert|aggregate root' "$CODEX_DIR/skills/" "$CODEX_DIR/agents/" 2>/dev/null || true; } | { grep -v '/templates/' || true; } | wc -l | tr -d ' ') +if [ "$ddd_refs" -gt 0 ]; then + echo -e " ${RED}✗${NC} $ddd_refs DDD terminology refs in codex mirror — clean canonical first" + fail=1 +fi + +# R4 mirror cross-coverage +meta_refs=$( { grep -rE 'GLOSSARY-MAP\.md|CONTEXT-MAP\.md' "$CODEX_DIR/skills/" "$CODEX_DIR/agents/" 2>/dev/null || true; } | { grep -v '/templates/' || true; } | wc -l | tr -d ' ') +if [ "$meta_refs" -gt 0 ]; then + echo -e " ${RED}✗${NC} $meta_refs meta-file refs in codex mirror" + fail=1 +fi +``` + +T3-T6 already mandate `scripts/sync-codex.sh runs clean` in their acceptance — extending the script's validation is in scope for this task because the validation block is shared infrastructure, not skill-specific prose. + +### Compatibility note + +The grep guard runs in <1s per scan. T7 must ship AFTER T3 (skill prose changes) so the canonical is clean by the time the guard runs. T7 depends on T3 already. + +## Investigation targets + +**Required:** +- `plugins/flow-next/scripts/ci_test.sh` — existing test structure (insertion point) +- `scripts/sync-codex.sh:760-770` — validation block (existing `AskUserQuestion` / `ToolSearch` checks; new DDD/meta checks land alongside) + +**Optional:** +- All skill prose files modified by T3-T6 (verify no DDD jargon before adding the guards) + +## Acceptance + +- [ ] `ci_test.sh` includes DDD-terminology grep guard (canonical scan); passes on current codebase after T3-T6 ship +- [ ] `scripts/sync-codex.sh` validation block includes DDD-terminology grep guard (Codex mirror scan); passes after sync runs +- [ ] Both guards also catch meta-file references (`GLOSSARY-MAP.md`, `CONTEXT-MAP.md`) per R4 +- [ ] Failure messages list offending files + lines (canonical) or count + remediation hint (mirror) +- [ ] Each guard runs in <2s (trivially — pure grep on a small file set) +- [ ] CI passes after this task lands (entire epic-touched canonical + mirror is clean) + +## Done summary +T7 ships the two-tier R17 grep guard: ci_test.sh scans canonical (skills/, agents/, commands/, flowctl.py) for DDD forbidden vocabulary + R4 meta-file refs (file:line on hit); sync-codex.sh validation block scans the Codex mirror with the same patterns (count + remediation hint on hit). Verified by fixture-injection that both tiers fire and that the current epic-touched canonical + mirror are clean. +## Evidence +- Commits: b872b2b5f19402130e024c02be8b516613b55684 +- Tests: plugins/flow-next/scripts/ci_test.sh (56 pass / 0 fail incl. new R17 + R4 guards), scripts/sync-codex.sh (validation green: R17 mirror + R4 mirror added), fixture-injection verification: canonical fired with file:line; mirror fired with 3 R17 + 1 R4 hit, exit 1 +- PRs: \ No newline at end of file diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.8.json b/.flow/tasks/fn-38-project-glossary-decision-records-and.8.json new file mode 100644 index 00000000..62216c75 --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.8.json @@ -0,0 +1,21 @@ +{ + "assignee": null, + "claim_note": "", + "claimed_at": null, + "created_at": "2026-04-30T07:57:54.738972Z", + "depends_on": [ + "fn-38-project-glossary-decision-records-and.1", + "fn-38-project-glossary-decision-records-and.2", + "fn-38-project-glossary-decision-records-and.3", + "fn-38-project-glossary-decision-records-and.4", + "fn-38-project-glossary-decision-records-and.5", + "fn-38-project-glossary-decision-records-and.6" + ], + "epic": "fn-38-project-glossary-decision-records-and", + "id": "fn-38-project-glossary-decision-records-and.8", + "priority": null, + "spec_path": ".flow/tasks/fn-38-project-glossary-decision-records-and.8.md", + "status": "todo", + "title": "Documentation rollup (CLAUDE.md, README, CHANGELOG, usage, flowctl docs)", + "updated_at": "2026-04-30T08:04:44.087354Z" +} diff --git a/.flow/tasks/fn-38-project-glossary-decision-records-and.8.md b/.flow/tasks/fn-38-project-glossary-decision-records-and.8.md new file mode 100644 index 00000000..3563cb2a --- /dev/null +++ b/.flow/tasks/fn-38-project-glossary-decision-records-and.8.md @@ -0,0 +1,82 @@ +## Description + +Update all in-scope contributor docs to reflect the new glossary, decisions category, and doc-aware interview mode. Per CLAUDE.md "Contributing / Development" section: in-scope = CHANGELOG.md, plugins/flow-next/README.md, CLAUDE.md, .flow/usage.md, plus the plugin's flowctl docs reference + root README's what's-new callout. Maintainer-only (mickel.tech website) is skipped per convention. + +**Size:** M +**Files:** +- `CHANGELOG.md` (new `[flow-next 0.39.0]` block) +- `CLAUDE.md` (root — commands list, memory system block, new Glossary section) +- `plugins/flow-next/README.md` (what's-new callout, commands table, memory tree, frontmatter schema, new Project Glossary section, audit/sync/scout extensions noted) +- `README.md` (root — what's-new callout) +- `.flow/usage.md` (new Glossary section) +- `plugins/flow-next/docs/flowctl.md` (memory category list, new glossary subcommand reference section) +- `plugins/flow-next/agents/memory-scout.md` (knowledge-category list at line ~22 enumerates 5 categories in prose; add `decisions`) + + + +## Approach + +Per `docs-gap-scout` findings (gathered during planning), update in this order: + +1. **CHANGELOG.md** — new `## [flow-next 0.39.0] - YYYY-MM-DD` block above 0.38.2. Sections: + - `### Added`: GLOSSARY.md artifact + flowctl glossary subcommands; knowledge/decisions/ category + frontmatter fields; /flow-next:interview --docs/--no-docs autodetect; doc-aware behaviors (a/b/c/d). + - `### Changed`: docs-gap-scout extends scan; /flow-next:audit walks glossary terms + decisions; /flow-next:sync detects glossary renames AND surfaces decision overrides read-only (never auto-supersedes; husks + superseded entries skipped); two-tier R17 + R4 grep guard added (canonical `ci_test.sh` section 5c scans skills/agents/commands/flowctl.py with file:line on hit; mirror `sync-codex.sh` validation block scans `plugins/flow-next/codex/` with count + remediation hint). + - `### Notes`: closed-epic foundations (fn-30 / fn-34 / fn-36 / fn-15-96t); explicitly call out R18 (uninstall survival) since it's a tenet-driven design choice. + +2. **CLAUDE.md** (lines 19-27, 65-86, ~97): + - Commands list: `/flow-next:interview` bullet gains `--docs` / `--no-docs` + autodetect description. + - Memory system block: add `decisions` to knowledge category list; document optional decision-specific fields. + - **New** Glossary section after Prospecting block: GLOSSARY.md placement, `flowctl glossary` subcommands, nearest-ancestor resolution rule, R18 (survives uninstall). + +3. **plugins/flow-next/README.md** (lines 24, 56, 1499, 1508, 1549, 1618, 1687, 1691, 1814, 1979, 2093): + - What's-new callout (line ~24): v0.39.0 one-liner. + - Commands table interview row: doc-aware mode mentioned (autodetect + `--docs`/`--no-docs` flags + four behaviors a-d). Note the new `## Glossary Conflicts` spec section that NEW-IDEA / EXISTING-EPIC interview templates now emit when behavior (a) fires — sits next to `## Resolved via Codebase` as the audit trail for canonical-vs-user wording resolutions. + - Memory system directory tree (line ~1525): add `decisions/`. + - Frontmatter schema: add decision-specific optional fields. + - **New** Project Glossary section after Memory System. + - Audit lifecycle: extended (glossary + decisions walks). + - Plan-sync: extended (glossary renames replace `_Avoid_` aliases with canonical terms via Phase 3b.1 with `` breadcrumb; decision overrides surfaced read-only via Phase 3b.2 under "Decision overrides flagged for review" heading — never auto-supersedes). + - Planning Phase docs-gap-scout bullet: extended scan list. + +4. **README.md (root)** — what's-new callout (lines 28-32) updated to v0.39.0 with one-line summary (glossary + decisions + doc-aware interview). + +5. **.flow/usage.md** — new `# Glossary (v0.39.0+)` section after memory commands block (line ~86), one-liner per `flowctl glossary` subcommand. + +6. **plugins/flow-next/docs/flowctl.md** — memory category list (line ~505) adds `decisions`; new `### glossary` section at end (after the `prospect` section ~line 591) documenting the four subcommands shipped in fn-38.2 (`add`, `list`, `read`, `remove`) + `--definition-file` variant on `add`. Note (per fn-38.2 implementation): `add` supports `--avoid`/`--relates-to` (comma-separated), upserts case-insensitively; `list --json` shape is `{groups: [{path, entries, count}], file_count, total_terms}`; `read --json` shape is `{path, term, definition, avoid, relates_to}`; last-term `remove` leaves a `# Glossary` H1 husk on disk (R18 — never deletes the file). No `mark-stale` subcommand was shipped in T2 — `/flow-next:audit` uses Edit-tool stale-marking. + + + +7. **plugins/flow-next/agents/memory-scout.md** (line ~22) — knowledge-track category list in agent prose currently enumerates 5 categories (`architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`). Add `decisions`. Surfaced by T1 worker. + +8. **R17 compliance**: no DDD jargon in any updated docs (manual review; T7 grep guard catches regressions). +9. **No website changes** — `~/work/mickel.tech/app/apps/flow-next/page.tsx` is maintainer-only per CLAUDE.md convention. Skip. + +## Investigation targets + +**Required:** +- `CLAUDE.md` lines 19-27, 65-86 (commands + memory blocks) +- `plugins/flow-next/README.md` lines 24, 56, 1499-1531, 1549-1561, 1618-1632, 1687-1691, 1814-1842, 1979-1984, 2093-2109 +- `CHANGELOG.md` lines 1-50 (current 0.38.2 entry as shape template) +- `README.md` lines 28-32 (current what's-new callout) +- `.flow/usage.md` lines 76-86 (memory commands block) +- `plugins/flow-next/docs/flowctl.md` lines 505-590 (memory + prospect sections as templates) + +## Acceptance + +- [ ] CHANGELOG.md has `[flow-next 0.39.0]` block covering R1-R18 changes (Added / Changed / Notes structure) +- [ ] CLAUDE.md commands list, memory system block, and new Glossary section all updated +- [ ] plugins/flow-next/README.md: what's-new callout + commands table + memory tree + frontmatter schema + new Project Glossary section + audit/sync/scout extension notes +- [ ] Root README.md what's-new callout updated to v0.39.0 +- [ ] .flow/usage.md gains Glossary section +- [ ] plugins/flow-next/docs/flowctl.md adds `decisions` to memory categories list + new glossary subcommand reference section +- [ ] plugins/flow-next/agents/memory-scout.md knowledge-category list (line ~22) includes `decisions` +- [ ] No DDD terminology in any updated docs (R17 — manually verify; T7 grep guard catches future regressions) +- [ ] Maintainer-only website (mickel.tech) NOT updated in this task (per CLAUDE.md convention) +- [ ] Version bump considered: this epic adds skill behavior (interview), so a version bump (likely 0.39.0) is warranted per CLAUDE.md versioning rules. The bump itself is the release-handoff step, not part of this task's acceptance — but the CHANGELOG entry must use the agreed version. + +## Done summary +Documentation rollup for fn-38: added [flow-next 0.39.0] CHANGELOG block, extended CLAUDE.md commands list + memory block + new Glossary section, updated root README + plugins/flow-next/README.md (version badges, what's-new callouts, memory tree adds decisions/, frontmatter schema for decision-specific fields, new Project Glossary section, audit + sync + scout extension notes, interview doc-aware mode + Glossary Conflicts template), added Glossary section to .flow/usage.md, extended plugins/flow-next/docs/flowctl.md with new ### glossary subcommand reference + decisions in memory categories, fixed memory-scout decisions enumeration drift (T1) and epic spec husk-aware autodetect example (T2). Two-tier R17 + R4 grep guards stay green; ci_test (56), smoke (130), glossary smoke (80), sync-codex all pass; mickel.tech website skipped per CLAUDE.md maintainer-only convention. +## Evidence +- Commits: a5ff6e46e343f33fb57a37eb632190209b05beb7 +- Tests: plugins/flow-next/scripts/ci_test.sh (56 pass), plugins/flow-next/scripts/smoke_test.sh (130 pass), plugins/flow-next/scripts/glossary_smoke_test.sh (80 pass), scripts/sync-codex.sh (21 skills + 21 agents validated; R17 + R4 mirror green) +- PRs: \ No newline at end of file diff --git a/.flow/usage.md b/.flow/usage.md index 59d1e667..c7d233ea 100644 --- a/.flow/usage.md +++ b/.flow/usage.md @@ -84,6 +84,17 @@ Task tracking for AI agents. All state lives in `.flow/`. .flow/bin/flowctl memory list-legacy # list legacy entries with mechanical defaults (v0.37.0+) .flow/bin/flowctl memory list-legacy --json # used by /flow-next:memory-migrate skill .flow/bin/flowctl memory migrate [--yes] [--json] # deterministic-only legacy migration (use /flow-next:memory-migrate for agent-native classification) + +# Glossary (project-canonical terms at repo root, v0.39.0+ — survives `rm -rf .flow/`) +.flow/bin/flowctl glossary add --definition "..." # upsert single-line term +.flow/bin/flowctl glossary add --definition-file body.md # multi-line definition from file +.flow/bin/flowctl glossary add --definition-file - # multi-line from stdin +.flow/bin/flowctl glossary add --avoid "alt1,alt2" --relates-to "x,y" +.flow/bin/flowctl glossary list # text mode: grouped by file (nearest first) +.flow/bin/flowctl glossary list --json # {groups, file_count, total_terms} +.flow/bin/flowctl glossary read # nearest-ancestor walk; first match wins +.flow/bin/flowctl glossary read --json # {path, term, definition, avoid, relates_to} +.flow/bin/flowctl glossary remove # last-term remove leaves `# Glossary` husk (R18) ``` ## Workflow diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bcad2d1..e03ff1d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,25 @@ All notable changes to the flow-next. +## [flow-next 0.39.0] - 2026-04-30 + +### Added +- **`GLOSSARY.md` artifact + `flowctl glossary` subcommands.** New first-class human-readable glossary that lives at the repo root (and optional subdirectories) so the project's canonical names + term-conflict resolutions survive `rm -rf .flow/` (R18). H2-per-term markdown format aligns with `open-gitops/documents` and `glossarify-md`. Resolution is nearest-ancestor-walk from cwd up to repo root (first match wins; same shape as `tsconfig.json` / EditorConfig discovery), capped at 32 levels with cycle detection. Subcommands: `flowctl glossary add [--definition ... | --definition-file FILE | -] [--avoid a,b,c] [--relates-to x,y] [--json]` upserts case-insensitively; `glossary list [--json]` returns `{groups: [{path, entries, count}], file_count, total_terms}` grouped by file (nearest first); `glossary read [--json]` walks ancestors and returns `{path, term, definition, avoid, relates_to}`; `glossary remove [--json]` removes from the file that defines it. Last-term `remove` leaves a `# Glossary` H1 husk on disk — never deletes the file (R18). New helper functions `find_nearest_glossary` / `find_all_glossaries` / `parse_glossary_file` / `render_glossary_file` / `validate_glossary_entry` / `_glossary_term_matches` / `_glossary_strip_fenced_code` and constants `GLOSSARY_FILE` / `GLOSSARY_WALK_MAX_DEPTH` are reusable from downstream skills via the subcommands rather than direct imports. +- **`knowledge/decisions/` memory category + decision-specific frontmatter fields.** New category alongside `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`. Three optional frontmatter fields permitted on any knowledge entry but specifically intended for `decisions/` entries: `decision_status` (enum: `proposed | accepted | superseded`), `superseded_by` (id reference), `alternatives_considered` (free-form prose). Schema constants exposed: `MEMORY_DECISION_FIELDS` (frozenset) and `MEMORY_DECISION_STATUSES` (enum tuple) live alongside the existing `MEMORY_KNOWLEDGE_FIELDS` / `MEMORY_STATUS` constants. Body convention: 1–3 sentence floor describing trade-offs, irreversibility, and surprise factor. Validator picks up additions automatically via the allowed-fields union. +- **`/flow-next:interview` doc-aware mode.** New autodetect: if `GLOSSARY.md` exists at any ancestor (with at least one term — husks are skipped) or `knowledge/decisions/` has at least one entry, the interview enters doc-aware mode. Override via `--docs` (force on) / `--no-docs` (force off). Four behaviors when active: (a) **glossary lookup before terminology questions** — fetch nearest-ancestor canonical wording via `flowctl glossary read`; surface conflicts as a `## Glossary Conflicts` section in the refined spec when user wording diverges from canonical, with resolution outcome (use-canonical / update-glossary / accept-divergence); (b) **inline glossary write on resolution** — `flowctl glossary add` invoked when the user picks update-glossary, recording the new canonical term; (c) **decision-record awareness** — when a load-bearing architectural choice is made during interview, prompt to write a `knowledge/decisions/` entry with the three-criteria gate (hard-to-reverse / surprising / load-bearing trade-off) and read-back loop before write; (d) **code/spec contradiction surfaced** — when an interview answer conflicts with an active decision record, the contradiction is surfaced in the refined spec rather than silently overwriting either side. The new `## Glossary Conflicts` template section sits alongside the existing `## Resolved via Codebase` section as the audit trail for canonical-vs-user wording resolutions; both are written by `NEW-IDEA` and `EXISTING-EPIC` interview templates. + +### Changed +- **`docs-gap-scout` extends planning-phase scan.** Scout now reads `GLOSSARY.md` at repo root (and walked ancestors when planning a subdirectory feature) plus `.flow/memory/knowledge/decisions/` to surface canonical terminology and prior load-bearing choices in the planning context. Planning-phase output flags terminology mismatches between the proposed feature description and the glossary, and lists relevant decision records the plan should respect. No new acceptance criteria are auto-added — surfaced findings flow into `/flow-next:plan` for human / planner judgment. +- **`/flow-next:audit` walks glossary terms + decision entries.** Phase 0.5 (new) reads every `GLOSSARY.md` on the ancestor chain and audits each term against the current code (any references intact? renamed? gone?). Phase 0.1 (extended) auto-walks `knowledge/decisions/` alongside other categories. Replace outcomes for decision entries are **supersede-not-delete** — the audit writes a new entry with `decision_status: accepted` and sets the old entry's `decision_status: superseded` + `superseded_by: `, preserving the historical trail. Other categories keep the existing Replace semantics. +- **`/flow-next:sync` detects glossary renames + flags decision overrides.** Phase 3b extends the drift sweep: **3b.1** glossary renames replace `_Avoid_` aliases with the canonical term across downstream task specs (additive — old wording is replaced inline with a `` breadcrumb); **3b.2** decision overrides surface read-only under a "Decision overrides flagged for review" heading in the affected task specs. Sync **never auto-supersedes** decision records — superseding is a human-judgment / audit-driven action. Husk and superseded entries are skipped (no work to do; the file_count == 0 OR total_terms == 0 short-circuit prevents false positives). The read-only contract on decisions matches the broader principle that automated drift sweeps should not silently rewrite explicit historical choices. +- **Two-tier R17 + R4 grep guard added to CI.** Canonical scan in `plugins/flow-next/scripts/ci_test.sh` section 5c covers `skills/`, `agents/`, `commands/`, and `flowctl.py`; matches print `file:line` for fast remediation. Mirror scan in `scripts/sync-codex.sh` validation block covers `plugins/flow-next/codex/`; matches print a count plus a remediation hint pointing back at the canonical guard. R17 enforces the forbidden-vocabulary list (intentionally only listed inline inside the grep pattern itself; documentation refers to "the R17 forbidden list" without enumeration); R4 forbids early-design meta-file names (`GLOSSARY-MAP.md`, `CONTEXT-MAP.md`) leaking into canonical or mirrored prose. + +### Notes +- **Foundations.** Builds on closed epics fn-30 (categorized memory schema), fn-34 (`/flow-next:audit`), fn-36 (capture + interview grill-me patterns), and the fn-15-96t plan-sync infrastructure. The `decisions/` category extends fn-30's schema additively; the doc-aware interview mode threads through fn-36's lead-with-recommendation + codebase-before-asking patterns; audit + sync extensions reuse fn-34's walk-and-decide framing. +- **R18 — survives uninstall by design.** `GLOSSARY.md` lives at the repo root, NOT inside `.flow/`. Deleting `.flow/` removes task tracking + memory + prospects, but the project's canonical wording stays put. This is a tenet, not an accident: terminology is the project's, not flow-next's. +- **Read-only sync contract.** Plan-sync's decision-override flagging is deliberately read-only. Auto-supersede would be a footgun: the agent might supersede an active decision based on a single conflicting task spec, losing the historical trail. Surface and let the human decide. +- **Smoke coverage:** `glossary_smoke_test.sh` (T2) covers parse / round-trip / nearest-ancestor walk / husk-on-last-remove / 80 assertions. `ci_test.sh` section 5c (R17 + R4 canonical) and `scripts/sync-codex.sh` validation block (R17 + R4 mirror) gate canonical and Codex-mirror prose hygiene. + ## [flow-next 0.38.3] - 2026-04-28 ### Changed diff --git a/CLAUDE.md b/CLAUDE.md index 4af3eb66..f12ac0e0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -26,7 +26,7 @@ Commands: - `/flow-next:resolve-pr [PR# | comment URL]` → resolve GitHub PR review threads (fetch → triage → dispatch resolver agents → validate → commit → reply → resolve via GraphQL). User-triggered only; Ralph does not invoke. Flags: `--dry-run`, `--no-cluster`. Parallel dispatch on Claude Code, serial on Codex/Copilot/Droid. Zero runtime deps beyond `gh` + `jq`. Added in 0.34.0. - `/flow-next:audit [mode:autofix] [scope hint]` → agent-native memory staleness review. Walks `.flow/memory/`, reviews each entry against current code, decides per entry: Keep / Update / Consolidate / Replace / Delete. Interactive (asks via blocking-question tool) or autofix (applies unambiguous, marks ambiguous as stale). Skips legacy flat files. The skill IS the agent — no Python engine, no subprocess dispatch. Added in 0.37.0. - `/flow-next:capture [mode:autofix] [--rewrite ] [--from-compacted-ok] [--yes]` → agent-native skill that synthesizes conversation context into a flow-next epic spec at `.flow/specs/.md` via existing `flowctl epic create + epic set-plan` (no new flowctl subcommands). Sits between free-form discussion / prospect-promotion and the formal plan/interview phase — the automated alternative to the manual `flowctl epic create + epic set-plan` heredoc. Hard guardrails: source-tagged criteria (`[user]` / `[paraphrase]` / `[inferred]`), mandatory read-back loop with `[inferred]` count, duplicate-epic detection (Phase 0 scans `.flow/epics/` + `flowctl memory search`), compaction detection (refuses without `--from-compacted-ok`), idempotency-via-`--rewrite`, must-ask cases for ambiguous title / untestable acceptance / scope-conflict, suggest-split at 8+ acceptance criteria (never auto-splits). Ralph-blocked. Added in 0.38.0. -- `/flow-next:interview` (enhanced in 0.38.0) folds three patterns from upstream `grill-me`: (a) lead-with-recommendation — every `AskUserQuestion` body includes options summary, recommended option, one-sentence rationale, confidence tier (`[high]` / `[judgment-call]` / `[your-call]`); (b) codebase-before-asking — pre-question taxonomy splits codebase-answerable ("what exists") from user-judgment-required ("what should"); codebase-answerable questions are investigated via Read/Grep/Glob and logged to a `## Resolved via Codebase` spec section; (c) dependency-ordered branches — depth cap 4, discover-as-you-go, surface abandoned branches. +- `/flow-next:interview` (enhanced in 0.38.0) folds three patterns from upstream `grill-me`: (a) lead-with-recommendation — every `AskUserQuestion` body includes options summary, recommended option, one-sentence rationale, confidence tier (`[high]` / `[judgment-call]` / `[your-call]`); (b) codebase-before-asking — pre-question taxonomy splits codebase-answerable ("what exists") from user-judgment-required ("what should"); codebase-answerable questions are investigated via Read/Grep/Glob and logged to a `## Resolved via Codebase` spec section; (c) dependency-ordered branches — depth cap 4, discover-as-you-go, surface abandoned branches. **Doc-aware mode (0.39.0+):** autodetects when `GLOSSARY.md` has at least one term (husks ignored) or `knowledge/decisions/` has entries; override via `--docs` / `--no-docs`. Four behaviors: glossary lookup before terminology questions (writes a `## Glossary Conflicts` spec section when user wording diverges from canonical), inline glossary write on resolution (`flowctl glossary add`), decision-record prompt with three-criteria gate + read-back when a load-bearing choice is made, code/spec contradiction surfaced rather than silently overwritten. Review backend spec grammar (v0.31.0+): - `backend[:model[:effort]]` — colon-delimited, trailing parts optional @@ -65,7 +65,8 @@ Ralph (autonomous loop): Memory system (categorized — v0.33.0+): - Config in `.flow/config.json` (NOT Ralph's `config.env`) - Tree under `.flow/memory/`: `bug//*.md` + `knowledge//*.md` (one entry per file) -- YAML frontmatter: `title`, `date`, `track`, `category`, `module`, `tags`, plus track-specific fields (`problem_type` / `root_cause` / `resolution_type` for bug; `applies_when` for knowledge) +- Knowledge categories: `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`, `decisions` (the last shipped in 0.39.0 for load-bearing architectural choices — body convention 1–3 sentences on trade-offs, irreversibility, surprise factor) +- YAML frontmatter: `title`, `date`, `track`, `category`, `module`, `tags`, plus track-specific fields (`problem_type` / `root_cause` / `resolution_type` for bug; `applies_when` for knowledge). Decision entries (knowledge track, `decisions` category) may add `decision_status` (proposed | accepted | superseded), `superseded_by` (id), `alternatives_considered` (prose) — additive; permitted on any knowledge entry but specifically intended for the `decisions/` subtree. Constants `MEMORY_DECISION_FIELDS` / `MEMORY_DECISION_STATUSES` (alongside `MEMORY_KNOWLEDGE_FIELDS` / `MEMORY_STATUS`). - Enable: `flowctl config set memory.enabled true` - Init: `flowctl memory init` - Add: `flowctl memory add --track --category --title "..." [--module ] [--tags "a,b"] [--body-file ]` @@ -96,6 +97,20 @@ Prospecting (v0.36.0+): - Exit codes: `read`/`promote` on corrupt artifact → 3 (stderr `[ARTIFACT CORRUPT: ]`); duplicate idea on `promote` without `--force` → 2; Ralph-block (`REVIEW_RECEIPT_PATH`/`FLOW_RALPH=1`) on `/flow-next:prospect` → 2. - User-triggered only. Ralph autonomous loop is unaffected — autonomous loops have no business deciding what a repo should tackle next. +Project glossary (v0.39.0+): +- `GLOSSARY.md` lives at the **repo root** (and optionally subdirectories), NOT inside `.flow/`. Survives `rm -rf .flow/` — the project's canonical wording is the project's, not flow-next's (R18 — survives uninstall by design). +- H2-per-term markdown format aligned with `open-gitops/documents` and `glossarify-md` so generic markdown tooling reads it cleanly. +- Resolution: nearest-ancestor walk from cwd up to repo root, first match wins (same shape as `tsconfig.json` / EditorConfig). Cap 32 levels with cycle detection (`GLOSSARY_WALK_MAX_DEPTH`). +- Subcommands: + - `flowctl glossary add [--definition ... | --definition-file FILE | -] [--avoid a,b] [--relates-to x,y] [--json]` — upserts case-insensitively. Creates `GLOSSARY.md` at repo root if no ancestor file exists. + - `flowctl glossary list [--json]` — `{groups: [{path, entries, count}], file_count, total_terms}` grouped by file (nearest first). + - `flowctl glossary read [--json]` — walks ancestors, returns `{path, term, definition, avoid, relates_to}`; first match wins. + - `flowctl glossary remove [--json]` — removes from the file that defines it. Last-term `remove` leaves a `# Glossary` H1 husk on disk (R18 — never deletes the file). +- Husk semantics: `total_terms == 0` (or `file_count == 0`) means no glossary signal. Doc-aware autodetect uses `flowctl glossary list --json | jq '.total_terms > 0'`, NOT `[[ -f GLOSSARY.md ]]` — a plain file-existence check would falsely activate doc-aware mode on an empty husk. +- Helpers (callable from Python; downstream skills should prefer flowctl subcommands): `find_nearest_glossary` / `find_all_glossaries` / `parse_glossary_file` / `render_glossary_file` / `validate_glossary_entry` / `_glossary_term_matches` / `_glossary_strip_fenced_code`. Fenced code blocks inside definitions are masked during parse so example terms in code don't get parsed as headings. +- Plan-sync contract: glossary renames replace `_Avoid_` aliases inline with the canonical term + breadcrumb (``); decision-record overrides are surfaced read-only under "Decision overrides flagged for review" — sync **never auto-supersedes** explicit historical choices. +- Forbidden vocabulary (R17): a small list of jargon terms is grep-guarded out of canonical skill / agent / command / flowctl prose by `ci_test.sh` section 5c, and out of the Codex mirror by `scripts/sync-codex.sh` validation block. Two-tier guard prevents drift through either source path. + ### flow Original plugin with optional Beads integration. Plan files in `plans/`. diff --git a/README.md b/README.md index 72b23329..4e400c35 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # Flow-Next [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) -[![Flow-next](https://img.shields.io/badge/Flow--next-v0.38.3-green)](plugins/flow-next/) +[![Flow-next](https://img.shields.io/badge/Flow--next-v0.39.0-green)](plugins/flow-next/) [![Docs](https://img.shields.io/badge/Docs-📖-informational)](plugins/flow-next/README.md) [![Author](https://img.shields.io/badge/Author-Gordon_Mickel-orange)](https://mickel.tech) @@ -25,7 +25,7 @@ Flow-Next is an AI agent orchestration plugin. **Sixteen agent-native skills** f First-class on **Claude Code**, **OpenAI Codex** (CLI + Desktop), and **Factory Droid**. Also runs on **OpenCode** via the [community port](https://github.com/gmickel/flow-next-opencode). -> 🆕 **v0.38.0 — Capture + interview grill-me.** New `/flow-next:capture` synthesizes free-form discussion into a flow-next epic spec with source-tagged criteria + mandatory read-back. `/flow-next:interview` now leads with recommendations + confidence tiers, investigates the codebase before asking, walks decision trees in dependency order. Cross-platform tool handling moved into the Codex sync script — canonical skills stay Claude-native, sync rewrites for Codex mirror. [Full changelog](CHANGELOG.md). +> 🆕 **v0.39.0 — Project glossary + decision records + doc-aware interview.** New `GLOSSARY.md` artifact at the repo root (survives `rm -rf .flow/`) with `flowctl glossary add/list/read/remove` and nearest-ancestor walk. New `knowledge/decisions/` memory category with `decision_status` lifecycle (proposed → accepted → superseded). `/flow-next:interview` autodetects doc-aware mode (`--docs` / `--no-docs` to override) — looks up canonical terms before asking, surfaces conflicts to a `## Glossary Conflicts` spec section, prompts for decision records on load-bearing choices, surfaces code/spec contradictions instead of silently overwriting. `/flow-next:audit` walks glossary + decisions; `/flow-next:sync` flags decision overrides read-only (never auto-supersedes). [Full changelog](CHANGELOG.md). > 🌐 **[Visual overview at mickel.tech/apps/flow-next](https://mickel.tech/apps/flow-next)** — diagrams, examples, the full feature tour. diff --git a/plugins/flow-next/.claude-plugin/plugin.json b/plugins/flow-next/.claude-plugin/plugin.json index 0edf22e9..9b703db1 100644 --- a/plugins/flow-next/.claude-plugin/plugin.json +++ b/plugins/flow-next/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow-next", - "version": "0.38.3", + "version": "0.39.0", "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode (multi-model review gates). Worker subagent per task for context isolation. Prime assesses 8 pillars (48 criteria) with GitHub API integration. Includes 21 subagents, 13 commands, 18 skills.", "author": { "name": "Gordon Mickel", diff --git a/plugins/flow-next/.codex-plugin/plugin.json b/plugins/flow-next/.codex-plugin/plugin.json index 710cb64d..3b10f83b 100644 --- a/plugins/flow-next/.codex-plugin/plugin.json +++ b/plugins/flow-next/.codex-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "flow-next", - "version": "0.38.3", + "version": "0.39.0", "description": "Zero-dependency planning + execution with .flow/ task tracking and Ralph autonomous mode. Worker subagent per task for context isolation. Compatible with Codex, Claude Code, and Factory Droid.", "author": { "name": "Gordon Mickel", diff --git a/plugins/flow-next/README.md b/plugins/flow-next/README.md index bf555248..4bdfdc71 100644 --- a/plugins/flow-next/README.md +++ b/plugins/flow-next/README.md @@ -6,7 +6,7 @@ [![Claude Code](https://img.shields.io/badge/Claude_Code-Plugin-blueviolet)](https://claude.ai/code) [![OpenAI Codex](https://img.shields.io/badge/OpenAI_Codex-Plugin-10a37f)](https://developers.openai.com/codex/cli/) -[![Version](https://img.shields.io/badge/Version-0.38.3-green)](../../CHANGELOG.md) +[![Version](https://img.shields.io/badge/Version-0.39.0-green)](../../CHANGELOG.md) [![Status](https://img.shields.io/badge/Status-Active_Development-brightgreen)](../../CHANGELOG.md) [![Discord](https://img.shields.io/badge/Discord-Join-5865F2?logo=discord&logoColor=white)](https://discord.gg/f3DYq8AAm5) @@ -21,9 +21,9 @@ 🌐 **Prefer a visual overview?** See the [Flow-Next app page](https://mickel.tech/apps/flow-next) for diagrams and examples. -> **What's new in 0.38.0:** `/flow-next:capture` synthesizes free-form discussion into a flow-next epic spec with source-tagged criteria + mandatory read-back. `/flow-next:interview` enhanced with lead-with-recommendation + confidence tiers + codebase-first investigation + dependency-ordered question branches. Cross-platform tool handling moved into the Codex sync script; canonical skills stay Claude-native, sync rewrites for Codex mirror. [Full changelog](../../CHANGELOG.md). +> **What's new in 0.39.0:** Project glossary + decision records + doc-aware interview. New `GLOSSARY.md` artifact at the repo root (survives `rm -rf .flow/`) with `flowctl glossary add/list/read/remove` and nearest-ancestor walk. New `knowledge/decisions/` memory category with `decision_status` lifecycle. `/flow-next:interview` autodetects doc-aware mode (`--docs` / `--no-docs` to override) — looks up canonical terms before asking, surfaces conflicts to a `## Glossary Conflicts` spec section, prompts for decision records on load-bearing choices. `/flow-next:audit` walks glossary + decisions; `/flow-next:sync` flags decision overrides read-only (never auto-supersedes). Two-tier R17 + R4 grep guard added in CI. [Full changelog](../../CHANGELOG.md). > -> Recent highlights: agent-native [memory audit](#memory-system) (0.37.0), [memory migrate skill](#memory-system) (0.37.0), [PR feedback resolver](#pr-feedback-resolution) (0.34.0), [prospect skill](#prospecting) for ranked candidate ideation (0.36.0), [opt-in review flags](#cross-model-reviews) `--validate` / `--deep` / `--interactive` (0.35.0). +> Recent highlights: [capture skill](#capture) for conversation-to-spec synthesis (0.38.0), [interview grill-me patterns](#flow-nextinterview) (0.38.0), agent-native [memory audit](#memory-system) (0.37.0), [memory migrate skill](#memory-system) (0.37.0), [PR feedback resolver](#pr-feedback-resolution) (0.34.0), [prospect skill](#prospecting) for ranked candidate ideation (0.36.0). --- @@ -37,6 +37,7 @@ - [Prospecting](#prospecting) — `/flow-next:prospect` - [Capture](#capture) — `/flow-next:capture` - [Memory System](#memory-system) — `/flow-next:audit` + `/flow-next:memory-migrate` +- [Project Glossary](#project-glossary) — `flowctl glossary` + doc-aware interview - [Agent Readiness Assessment](#agent-readiness-assessment) — `/flow-next:prime` - [PR Feedback Resolution](#pr-feedback-resolution) — `/flow-next:resolve-pr` - [Cross-Model Reviews](#cross-model-reviews) — RepoPrompt / Codex / Copilot @@ -1505,6 +1506,8 @@ When enabled, plan-sync also checks other open epics for stale references. Usefu Manual sync ignores `planSync.enabled` config—if you run it, you want it. Works with any source task status (not just done). +**Sync extensions (v0.39.0+):** Phase 3b extends the drift sweep with two additions. **3b.1 glossary renames** replace `_Avoid_` aliases with the canonical term across downstream task specs (additive — old wording is replaced inline with a `` breadcrumb). **3b.2 decision overrides** are surfaced read-only under a `Decision overrides flagged for review` heading in affected task specs — sync **never auto-supersedes** decision records, since superseding is a human-judgment / audit-driven action. Husk and superseded entries are skipped (no work to do; the `file_count == 0` OR `total_terms == 0` short-circuit prevents false positives on empty husks). The read-only contract on decisions matches the broader principle that automated drift sweeps should not silently rewrite explicit historical choices. + ### Memory System (Opt-in, categorized — v0.33.0+) Persistent learnings that survive context compaction. One entry per file, YAML frontmatter, two tracks. @@ -1527,7 +1530,8 @@ Persistent learnings that survive context compaction. One entry per file, YAML f ├── conventions/ ├── tooling-decisions/ ├── workflow/ - └── best-practices/ + ├── best-practices/ + └── decisions/ # v0.39.0+ — load-bearing architectural choices ``` **Frontmatter schema (bug track):** @@ -1560,6 +1564,26 @@ applies_when: writing Ralph loop scripts or review shims --- ``` +**Frontmatter schema (decisions — knowledge track, v0.39.0+):** + +```yaml +--- +title: Use nearest-ancestor walk for GLOSSARY.md resolution +date: 2026-04-30 +track: knowledge +category: decisions +module: glossary +tags: [glossary, resolution, walk] +decision_status: accepted # proposed | accepted | superseded +alternatives_considered: | + - always-root: simpler, but loses subdir flexibility + - explicit-path: makes resolution opaque to skills +superseded_by: null # set when decision_status = superseded +--- +``` + +Decision body convention: 1–3 sentence floor describing trade-offs, irreversibility, and surprise factor. The three decision-specific fields (`decision_status`, `superseded_by`, `alternatives_considered`) are permitted on any knowledge entry but specifically intended for the `decisions/` subtree. Constants `MEMORY_DECISION_FIELDS` / `MEMORY_DECISION_STATUSES` (alongside `MEMORY_KNOWLEDGE_FIELDS` / `MEMORY_STATUS`). + **Enable + init:** ```bash @@ -1619,6 +1643,8 @@ Search scoring is weighted: title 5×, tags 3×, body 1.5×, misc 1×. Legacy hi `/flow-next:audit [mode:autofix] [scope hint]` walks `.flow/memory/`, reviews each entry against the current codebase, and decides per entry whether to **Keep / Update / Consolidate / Replace / Delete**. Interactive mode (default) asks via the platform's blocking-question tool; autofix mode applies unambiguous actions and marks ambiguous entries as stale. The skill is agent-native — host agent reads the workflow markdown and executes it directly using its own Read/Grep/Glob tools (no Python audit engine, no codex/copilot subprocess dispatch). Legacy flat files are skipped with a warning. +**Audit extensions (v0.39.0+):** Phase 0.5 (new) reads every `GLOSSARY.md` on the ancestor chain and audits each term against the current code (any references intact? renamed? gone?). Phase 0.1 (extended) auto-walks `knowledge/decisions/` alongside other categories. **Replace outcomes for decision entries are supersede-not-delete** — the audit writes a new entry with `decision_status: accepted` and sets the old entry's `decision_status: superseded` + `superseded_by: `, preserving the historical trail. Other categories keep the existing Replace semantics. + Two flowctl helpers back the audit lifecycle (also callable directly): ```bash @@ -1686,6 +1712,50 @@ Until migration runs, legacy flat files continue to work; `list` / `read` / `sea --- +## Project Glossary + +`GLOSSARY.md` is a human-readable, project-canonical terminology file shipped in v0.39.0. Lives at the **repo root** (and optionally subdirectories), NOT inside `.flow/`. Survives `rm -rf .flow/` — terminology is the project's, not flow-next's. + +**Format:** H2-per-term markdown aligned with `open-gitops/documents` and `glossarify-md` so generic markdown tooling reads it cleanly. Optional `_Avoid_:` and `_Relates to_:` italic lines surface aliases and cross-references. Multi-line definitions are supported; fenced code blocks inside definitions are masked during parse so example terms in code don't get parsed as headings. + +**Resolution:** Nearest-ancestor walk from cwd up to repo root, first match wins (same shape as `tsconfig.json` / EditorConfig). Capped at 32 levels with cycle detection. + +**Subcommands:** + +```bash +# Add or update a term — single-line, file, or stdin +flowctl glossary add --definition "Short definition." +flowctl glossary add --definition-file body.md +flowctl glossary add --definition-file - + +# Optional alias / relates-to flags +flowctl glossary add --definition "..." --avoid "alt1,alt2" --relates-to "x,y" + +# List defined terms (grouped by file, nearest first) +flowctl glossary list # text mode +flowctl glossary list --json # {groups, file_count, total_terms} + +# Read a term — walks ancestors, first match wins +flowctl glossary read +flowctl glossary read --json # {path, term, definition, avoid, relates_to} + +# Remove a term — last-term remove leaves an `# Glossary` H1 husk on disk +flowctl glossary remove +``` + +**Husk semantics:** Last-term `remove` leaves a `# Glossary` H1 husk on disk — the file is **never** deleted. R18 (survives uninstall) covers both the file living outside `.flow/` AND the file persisting after the last term is removed. Doc-aware autodetect should branch on `total_terms > 0`, not on `[[ -f GLOSSARY.md ]]` — the latter would falsely activate doc-aware mode on an empty husk. + +**How the rest of flow-next uses it:** + +- **`/flow-next:interview`** doc-aware mode (autodetect when `total_terms > 0` or `knowledge/decisions/` is non-empty): looks up canonical wording before terminology questions; surfaces user-vs-canonical conflicts to a `## Glossary Conflicts` spec section; writes new terms via `flowctl glossary add` when the user picks update-glossary; prompts for `knowledge/decisions/` entries on load-bearing choices. +- **`/flow-next:audit`** Phase 0.5: walks every `GLOSSARY.md` on the ancestor chain and audits each term against the current code (any references intact? renamed? gone?). +- **`/flow-next:sync`** Phase 3b.1: glossary renames replace `_Avoid_` aliases with the canonical term inline across downstream task specs, with a `` breadcrumb. +- **`docs-gap-scout`** in the planning phase: reads `GLOSSARY.md` on the ancestor chain to surface canonical terminology in the planning context; flags terminology mismatches between the proposed feature description and the glossary. + +**Forbidden vocabulary (R17):** A small list of jargon terms is grep-guarded out of canonical skill / agent / command / flowctl prose by `ci_test.sh` section 5c (canonical scan, prints `file:line` on hit), and out of the Codex mirror by `scripts/sync-codex.sh` validation block (mirror scan, prints count + remediation hint). The forbidden list is enumerated only inside the grep pattern itself; documentation refers to "the R17 forbidden list" without re-enumeration to avoid teaching the very vocabulary it's meant to suppress. + +--- + ## Commands Sixteen commands, complete workflow: @@ -1696,7 +1766,7 @@ Sixteen commands, complete workflow: | `/flow-next:capture [flags]` | Synthesize conversation context into an epic spec; source-tagged + mandatory read-back ([details](#capture)) | | `/flow-next:plan ` | Research the codebase, create epic with dependency-ordered tasks | | `/flow-next:work ` | Execute epic, task, or spec file, re-anchoring before each | -| `/flow-next:interview ` | Deep interview to flesh out a spec before planning | +| `/flow-next:interview ` | Deep interview to flesh out a spec before planning; doc-aware mode (autodetect + `--docs` / `--no-docs`) looks up canonical terms, surfaces conflicts to a `## Glossary Conflicts` spec section, prompts for decision records on load-bearing choices ([details](#flow-nextinterview)) | | `/flow-next:plan-review ` | Carmack-level plan review (RepoPrompt, Codex, or Copilot) | | `/flow-next:impl-review` | Carmack-level impl review of current branch | | `/flow-next:epic-review ` | Epic-completion review: verify implementation matches spec | @@ -1740,6 +1810,7 @@ Natural language also works: |---------|-----------------| | `/flow-next:prospect` | `[focus hint]` (positional) — concept / path / constraint / volume | | `/flow-next:capture` | `mode:autofix` (positional), `--rewrite `, `--from-compacted-ok`, `--yes` | +| `/flow-next:interview` | `--docs` / `--no-docs` (override doc-aware autodetect, v0.39.0+) | | `/flow-next:plan` | `--research=rp\|grep`, `--review=rp\|codex\|copilot\|export\|none`, `--no-review` | | `/flow-next:work` | `--branch=current\|new\|worktree`, `--review=rp\|codex\|copilot\|export\|none`, `--no-review` | | `/flow-next:plan-review` | `--review=rp\|codex\|copilot\|export` | @@ -1841,6 +1912,24 @@ Deep questioning (40+ questions) to surface requirements, edge cases, and decisi These three patterns are additive enhancements to **how** questions are asked, not what gets asked. Existing 40+ question coverage is unchanged. +**Doc-aware mode (0.39.0+):** + +Autodetects when `GLOSSARY.md` has at least one term (husks ignored — branches on `flowctl glossary list --json | jq '.total_terms > 0'`, NOT plain file existence) or `knowledge/decisions/` has at least one entry. Override via: + +| Flag | Description | +|------|-------------| +| `--docs` | Force doc-aware mode on (even if autodetect says off) | +| `--no-docs` | Force doc-aware mode off (skip glossary lookup + decision-record prompts) | + +Four behaviors when active: + +- **(a) Glossary lookup before terminology questions** — fetch nearest-ancestor canonical wording via `flowctl glossary read` before asking the user about terminology. If user wording diverges from canonical, surface the conflict in a new `## Glossary Conflicts` section in the refined spec — sits next to `## Resolved via Codebase` as the audit trail for canonical-vs-user wording resolutions. Resolution outcome (use-canonical / update-glossary / accept-divergence) is recorded inline. +- **(b) Inline glossary write on resolution** — when the user picks `update-glossary`, `flowctl glossary add` is invoked immediately, recording the new canonical term with the user's chosen definition. The added term flows into downstream tasks via `docs-gap-scout` on the next planning pass. +- **(c) Decision-record awareness** — when a load-bearing architectural choice is made during the interview, prompt the user (via `AskUserQuestion`) to write a `knowledge/decisions/` entry. Three-criteria gate: hard-to-reverse / surprising / load-bearing trade-off. Read-back loop before write so the user can correct trade-off framing. +- **(d) Code/spec contradiction surfaced** — when an interview answer conflicts with an active decision record, the contradiction is surfaced in the refined spec (under `## Glossary Conflicts` or a similarly-named section) rather than silently overwriting either side. The user picks: revise the spec, supersede the decision, or accept divergence with rationale. + +Both `NEW-IDEA` and `EXISTING-EPIC` interview templates emit the `## Glossary Conflicts` section when behavior (a) or (d) fires. + #### `/flow-next:plan-review` ``` @@ -1978,7 +2067,7 @@ Override via flags or `scripts/ralph/config.env`. ### Planning Phase -1. **Research (parallel subagents)**: `repo-scout` (or `context-scout` if rp-cli) + `practice-scout` + `docs-scout` + `github-scout` + `epic-scout` + `docs-gap-scout` +1. **Research (parallel subagents)**: `repo-scout` (or `context-scout` if rp-cli) + `practice-scout` + `docs-scout` + `github-scout` + `epic-scout` + `docs-gap-scout` (v0.39.0+: also reads `GLOSSARY.md` on the ancestor chain + `knowledge/decisions/` to surface canonical terminology + prior load-bearing choices in the planning context) 2. **Gap analysis**: `flow-gap-analyst` finds edge cases + missing requirements 3. **Epic creation**: Writes spec to `.flow/specs/fn-N.md`, sets epic dependencies from `epic-scout` findings 4. **Task breakdown**: Creates tasks + explicit dependencies in `.flow/tasks/`, adds doc update acceptance criteria from `docs-gap-scout` diff --git a/plugins/flow-next/agents/docs-gap-scout.md b/plugins/flow-next/agents/docs-gap-scout.md index b8c4bb23..c1fcd8db 100644 --- a/plugins/flow-next/agents/docs-gap-scout.md +++ b/plugins/flow-next/agents/docs-gap-scout.md @@ -40,8 +40,20 @@ ls -la adr/ adrs/ decisions/ architecture/ 2>/dev/null # Generated docs ls -la typedoc.json jsdoc.json mkdocs.yml 2>/dev/null + +# Project glossary (root + subdirs) — prefer flowctl when present +# Returns {groups: [{path, entries, count}], file_count, total_terms} +.flow/bin/flowctl glossary list --json 2>/dev/null \ + || find . -name GLOSSARY.md -not -path './node_modules/*' -not -path './.git/*' 2>/dev/null + +# Decision records (flow-next memory category) +ls -la .flow/memory/knowledge/decisions/ 2>/dev/null ``` +Notes on the glossary scan: +- Prefer `flowctl glossary list --json` — it walks ancestors and groups by file. Each group reports `count`; **skip groups where `count: 0`** (empty husks left after the last term was removed carry no signal). +- Fall back to raw `find` only when flowctl is unavailable. + ### 2. Categorize what exists Build a map: @@ -51,6 +63,8 @@ Build a map: - **Architecture**: ADRs, design docs - **Design system**: DESIGN.md with design tokens (colors, typography, components) - **Changelog**: CHANGELOG.md or similar +- **Glossary**: `GLOSSARY.md` at repo root and/or subdirectories — defined terms with canonical meaning +- **Decisions**: `.flow/memory/knowledge/decisions/` — recorded choices with `Consequences` sections ### 3. Match request to docs @@ -66,6 +80,8 @@ Based on the REQUEST, identify which docs likely need updates: | Architectural decision | ADR | | CLI change | README CLI section, --help text | | Design tokens/theming | DESIGN.md color, typography, component sections | +| Glossary term touched | When the planned diff modifies code that uses a term defined in any `GLOSSARY.md`, flag the glossary entry (file + term name) for review | +| Decision constraint | When the planned diff touches a file referenced in a decision entry's `Consequences` section, flag the decision entry (id + title) for review | ### 4. Check current doc state @@ -89,6 +105,8 @@ For identified docs, quick scan to understand structure: - **README.md**: Update usage section for new feature - **CHANGELOG.md**: Add entry under "Added" - **openapi.yaml**: Add new /auth endpoint spec +- **GLOSSARY.md** (root): Term `Session` touched — diff changes session-cookie semantics +- **`.flow/memory/knowledge/decisions/use-jwt-2026-04-12.md`**: Consequences reference auth middleware which this diff modifies ### No Updates Expected - DESIGN.md (no design token changes) diff --git a/plugins/flow-next/agents/memory-scout.md b/plugins/flow-next/agents/memory-scout.md index 3ea1fc42..9b846d99 100644 --- a/plugins/flow-next/agents/memory-scout.md +++ b/plugins/flow-next/agents/memory-scout.md @@ -19,7 +19,7 @@ You receive either: Entries live under `.flow/memory/` in a categorized tree (new schema, post fn-30): - `bug//-YYYY-MM-DD.md` — learnings from NEEDS_WORK reviews and runtime failures. Categories: `build-errors`, `test-failures`, `runtime-errors`, `performance`, `security`, `integration`, `data`, `ui`. -- `knowledge//-YYYY-MM-DD.md` — curated conventions, architecture patterns, tooling decisions. Categories: `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`. +- `knowledge//-YYYY-MM-DD.md` — curated conventions, architecture patterns, tooling decisions. Categories: `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`, `decisions`. Legacy flat files (pre-migration) may still exist: - `pitfalls.md` / `conventions.md` / `decisions.md` — readable via the same CLI (reported as `track: "legacy"`). diff --git a/plugins/flow-next/agents/plan-sync.md b/plugins/flow-next/agents/plan-sync.md index 2c9f70a5..4fb83174 100644 --- a/plugins/flow-next/agents/plan-sync.md +++ b/plugins/flow-next/agents/plan-sync.md @@ -17,6 +17,8 @@ You synchronize downstream task specs after implementation drift. - `DOWNSTREAM_TASK_IDS` - comma-separated list of remaining tasks - `DRY_RUN` - "true" or "false" (optional, defaults to false) - `CROSS_EPIC` - "true" or "false" (from config planSync.crossEpic, defaults to false) +- `GLOSSARY_JSON` - output of `flowctl glossary list --json` (optional; defaults to `{"groups":[],"file_count":0,"total_terms":0}` when the project has no glossary) +- `DECISIONS_JSON` - output of `flowctl memory list --track knowledge --category decisions --json` (optional; defaults to `{"entries":[],"count":0}` when no decision entries exist) ## Phase 1: Re-anchor on Completed Task @@ -66,6 +68,42 @@ Compare spec vs implementation: Drift exists if implementation differs from spec in ways that downstream tasks reference. +## Phase 3b: Glossary renames + decision overrides + +Two extra signal types layer on top of the variable/API drift in Phase 3. Both are sourced from the input prompt — no extra flowctl calls required. + +### 3b.1 — Glossary-term renames + +Skip this section when `GLOSSARY_JSON.file_count == 0` OR `GLOSSARY_JSON.total_terms == 0` (every group is a husk; no signal). Otherwise iterate `groups[].entries[]`: + +For each entry with at least one `avoid` alias: +1. Search the **completed task spec** and the **epic spec** for any `avoid` alias (case-insensitive, whole-word). Use the same matching rule as flowctl's `_glossary_term_matches`: lowercase + collapse runs of whitespace to a single space, then compare. The host agent's Grep tool with `-i` and `\b` anchors is equivalent. +2. Search the **actual code touched by the completed task** (files in `evidence.commits` from Phase 1) for the canonical `term`. +3. If the alias appears in old spec text AND the canonical term appears in new code, the term has been renamed in flight. Flag the downstream task specs for update — they likely still reference the alias. + +Example: +- `GLOSSARY_JSON` entry: `{"term": "feedback loop", "avoid": ["polling cycle", "tick"]}` +- Old spec text: "...starts a new polling cycle..." +- New code (from completed task): `def run_feedback_loop(...)` +- Action: in Phase 5, update downstream specs that say "polling cycle" to say "feedback loop"; add a `` breadcrumb. + +When the canonical term appears in old spec text already, no rename — skip. + +### 3b.2 — Decision overrides + +Skip when `DECISIONS_JSON.count == 0`. Otherwise iterate `DECISIONS_JSON.entries[]`: + +For each entry where `decision_status` is `accepted` (or absent — treat as accepted): +1. Read the entry body (`flowctl memory read `) and locate the `## Consequences` section if present. +2. Extract any file paths, module names, or API names referenced under `Consequences`. The agent reads the prose directly — no regex extraction is required; the goal is to find concrete code references the decision committed to. +3. Cross-check against the actual code touched by the completed task (files from `evidence.commits`). If the completed task modifies a file the decision named, AND the change appears to contradict the decision's stated direction (e.g. decision says "we use REST" + new code adds a `/graphql` endpoint), surface the decision id in the report. + +**Do not auto-supersede.** Do not Edit the decision entry. Do not write a successor. The agent's job here is signal-surface only — list decision ids that need human review in the Phase 6 summary under a `Decision overrides flagged for review` heading. The user (or `/flow-next:audit`) decides whether to supersede. + +Skip entries with `decision_status: superseded` — historical record, not active constraint. + +When the `Consequences` section is missing or names no concrete code references, skip the entry — there's nothing to cross-check. + ## Phase 4: Check Downstream Tasks For each task in DOWNSTREAM_TASK_IDS: @@ -78,6 +116,7 @@ Look for references to: - Names/APIs from completed task spec (now stale) - Assumptions about data structures - Integration points that changed +- Glossary `_Avoid_` aliases flagged in Phase 3b.1 (downstream spec uses the alias; canonical term should land instead) Flag tasks that need updates. @@ -127,7 +166,9 @@ Changes should: - Update variable/function names to match actual - Correct API signatures - Fix data structure assumptions +- Replace glossary aliases (Phase 3b.1) with the canonical term; preserve surrounding prose - Add note: `` +- For glossary renames, the breadcrumb names the alias and canonical term: `` **DO NOT:** - Change task scope or requirements @@ -166,10 +207,14 @@ Drift detected: yes Would update (DRY RUN): - fn-1.3: Change references from `UserAuth.login()` to `authService.authenticate()` - fn-1.4: Update expected return type from `boolean` to `AuthResult` +- fn-1.5: Replace glossary alias "polling cycle" with canonical "feedback loop" Would update traceability: # Only if table exists - R2 (Session persistence): would add fn-1.4 coverage (API changed from fn-1.2) +Decision overrides flagged for review: # Only if DECISIONS_JSON had entries with overrides +- knowledge/decisions/use-rest-not-graphql-2026-03-12: completed task added `/graphql` endpoint in src/api/router.ts; review for supersession. + No files modified. ``` @@ -182,14 +227,20 @@ Drift detected: yes Updated tasks (same epic): - fn-1.3: Changed references from `UserAuth.login()` to `authService.authenticate()` - fn-1.4: Updated expected return type from `boolean` to `AuthResult` +- fn-1.5: Replaced glossary alias "polling cycle" with canonical "feedback loop" Updated tasks (cross-epic): # Only if CROSS_EPIC enabled and found - fn-3.2: Updated authService import path Updated traceability: # Only if table exists and rows affected - R2 (Session persistence): removed fn-1.2 coverage (API changed), now needs fn-1.4 + +Decision overrides flagged for review: # Only if DECISIONS_JSON had entries with overrides +- knowledge/decisions/use-rest-not-graphql-2026-03-12: completed task added `/graphql` endpoint in src/api/router.ts; review for supersession. ``` +**Decision overrides are surfaced, not auto-resolved.** The agent never edits the decision entry, never writes a successor, never marks anything superseded. The user (or `/flow-next:audit`) handles supersession. + ## Rules - **Read-only exploration** - Use Grep/Glob/Read for codebase, never edit source @@ -197,6 +248,8 @@ Updated traceability: # Only if table exists and rows affected - **Preserve intent** - Update references, not requirements - **Minimal changes** - Only fix stale references, don't rewrite specs - **Skip if no drift** - Return quickly if implementation matches spec +- **Glossary entries are read-only** - never Edit `GLOSSARY.md` files; the agent only consumes the JSON +- **Decision entries are read-only** - never Edit `.flow/memory/knowledge/decisions/*.md`; surface overrides for human review ## R-ID preservation (MANDATORY) diff --git a/plugins/flow-next/codex/agents/docs-gap-scout.toml b/plugins/flow-next/codex/agents/docs-gap-scout.toml index 816dcd31..08f010e3 100644 --- a/plugins/flow-next/codex/agents/docs-gap-scout.toml +++ b/plugins/flow-next/codex/agents/docs-gap-scout.toml @@ -41,8 +41,20 @@ ls -la adr/ adrs/ decisions/ architecture/ 2>/dev/null # Generated docs ls -la typedoc.json jsdoc.json mkdocs.yml 2>/dev/null + +# Project glossary (root + subdirs) — prefer flowctl when present +# Returns {groups: [{path, entries, count}], file_count, total_terms} +.flow/bin/flowctl glossary list --json 2>/dev/null \\ + || find . -name GLOSSARY.md -not -path './node_modules/*' -not -path './.git/*' 2>/dev/null + +# Decision records (flow-next memory category) +ls -la .flow/memory/knowledge/decisions/ 2>/dev/null ``` +Notes on the glossary scan: +- Prefer `flowctl glossary list --json` — it walks ancestors and groups by file. Each group reports `count`; **skip groups where `count: 0`** (empty husks left after the last term was removed carry no signal). +- Fall back to raw `find` only when flowctl is unavailable. + ### 2. Categorize what exists Build a map: @@ -52,6 +64,8 @@ Build a map: - **Architecture**: ADRs, design docs - **Design system**: DESIGN.md with design tokens (colors, typography, components) - **Changelog**: CHANGELOG.md or similar +- **Glossary**: `GLOSSARY.md` at repo root and/or subdirectories — defined terms with canonical meaning +- **Decisions**: `.flow/memory/knowledge/decisions/` — recorded choices with `Consequences` sections ### 3. Match request to docs @@ -67,6 +81,8 @@ Based on the REQUEST, identify which docs likely need updates: | Architectural decision | ADR | | CLI change | README CLI section, --help text | | Design tokens/theming | DESIGN.md color, typography, component sections | +| Glossary term touched | When the planned diff modifies code that uses a term defined in any `GLOSSARY.md`, flag the glossary entry (file + term name) for review | +| Decision constraint | When the planned diff touches a file referenced in a decision entry's `Consequences` section, flag the decision entry (id + title) for review | ### 4. Check current doc state @@ -90,6 +106,8 @@ For identified docs, quick scan to understand structure: - **README.md**: Update usage section for new feature - **CHANGELOG.md**: Add entry under "Added" - **openapi.yaml**: Add new /auth endpoint spec +- **GLOSSARY.md** (root): Term `Session` touched — diff changes session-cookie semantics +- **`.flow/memory/knowledge/decisions/use-jwt-2026-04-12.md`**: Consequences reference auth middleware which this diff modifies ### No Updates Expected - DESIGN.md (no design token changes) diff --git a/plugins/flow-next/codex/agents/memory-scout.toml b/plugins/flow-next/codex/agents/memory-scout.toml index e15b928c..d6127940 100644 --- a/plugins/flow-next/codex/agents/memory-scout.toml +++ b/plugins/flow-next/codex/agents/memory-scout.toml @@ -19,7 +19,7 @@ You receive either: Entries live under `.flow/memory/` in a categorized tree (new schema, post fn-30): - `bug//-YYYY-MM-DD.md` — learnings from NEEDS_WORK reviews and runtime failures. Categories: `build-errors`, `test-failures`, `runtime-errors`, `performance`, `security`, `integration`, `data`, `ui`. -- `knowledge//-YYYY-MM-DD.md` — curated conventions, architecture patterns, tooling decisions. Categories: `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`. +- `knowledge//-YYYY-MM-DD.md` — curated conventions, architecture patterns, tooling decisions. Categories: `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`, `decisions`. Legacy flat files (pre-migration) may still exist: - `pitfalls.md` / `conventions.md` / `decisions.md` — readable via the same CLI (reported as `track: "legacy"`). diff --git a/plugins/flow-next/codex/agents/plan-sync.toml b/plugins/flow-next/codex/agents/plan-sync.toml index c628f986..ee33cab8 100644 --- a/plugins/flow-next/codex/agents/plan-sync.toml +++ b/plugins/flow-next/codex/agents/plan-sync.toml @@ -17,6 +17,8 @@ You synchronize downstream task specs after implementation drift. - `DOWNSTREAM_TASK_IDS` - comma-separated list of remaining tasks - `DRY_RUN` - "true" or "false" (optional, defaults to false) - `CROSS_EPIC` - "true" or "false" (from config planSync.crossEpic, defaults to false) +- `GLOSSARY_JSON` - output of `flowctl glossary list --json` (optional; defaults to `{"groups":[],"file_count":0,"total_terms":0}` when the project has no glossary) +- `DECISIONS_JSON` - output of `flowctl memory list --track knowledge --category decisions --json` (optional; defaults to `{"entries":[],"count":0}` when no decision entries exist) ## Phase 1: Re-anchor on Completed Task @@ -66,6 +68,42 @@ Compare spec vs implementation: Drift exists if implementation differs from spec in ways that downstream tasks reference. +## Phase 3b: Glossary renames + decision overrides + +Two extra signal types layer on top of the variable/API drift in Phase 3. Both are sourced from the input prompt — no extra flowctl calls required. + +### 3b.1 — Glossary-term renames + +Skip this section when `GLOSSARY_JSON.file_count == 0` OR `GLOSSARY_JSON.total_terms == 0` (every group is a husk; no signal). Otherwise iterate `groups[].entries[]`: + +For each entry with at least one `avoid` alias: +1. Search the **completed task spec** and the **epic spec** for any `avoid` alias (case-insensitive, whole-word). Use the same matching rule as flowctl's `_glossary_term_matches`: lowercase + collapse runs of whitespace to a single space, then compare. The host agent's Grep tool with `-i` and `\\b` anchors is equivalent. +2. Search the **actual code touched by the completed task** (files in `evidence.commits` from Phase 1) for the canonical `term`. +3. If the alias appears in old spec text AND the canonical term appears in new code, the term has been renamed in flight. Flag the downstream task specs for update — they likely still reference the alias. + +Example: +- `GLOSSARY_JSON` entry: `{"term": "feedback loop", "avoid": ["polling cycle", "tick"]}` +- Old spec text: "...starts a new polling cycle..." +- New code (from completed task): `def run_feedback_loop(...)` +- Action: in Phase 5, update downstream specs that say "polling cycle" to say "feedback loop"; add a `` breadcrumb. + +When the canonical term appears in old spec text already, no rename — skip. + +### 3b.2 — Decision overrides + +Skip when `DECISIONS_JSON.count == 0`. Otherwise iterate `DECISIONS_JSON.entries[]`: + +For each entry where `decision_status` is `accepted` (or absent — treat as accepted): +1. Read the entry body (`flowctl memory read `) and locate the `## Consequences` section if present. +2. Extract any file paths, module names, or API names referenced under `Consequences`. The agent reads the prose directly — no regex extraction is required; the goal is to find concrete code references the decision committed to. +3. Cross-check against the actual code touched by the completed task (files from `evidence.commits`). If the completed task modifies a file the decision named, AND the change appears to contradict the decision's stated direction (e.g. decision says "we use REST" + new code adds a `/graphql` endpoint), surface the decision id in the report. + +**Do not auto-supersede.** Do not Edit the decision entry. Do not write a successor. The agent's job here is signal-surface only — list decision ids that need human review in the Phase 6 summary under a `Decision overrides flagged for review` heading. The user (or `/flow-next:audit`) decides whether to supersede. + +Skip entries with `decision_status: superseded` — historical record, not active constraint. + +When the `Consequences` section is missing or names no concrete code references, skip the entry — there's nothing to cross-check. + ## Phase 4: Check Downstream Tasks For each task in DOWNSTREAM_TASK_IDS: @@ -78,6 +116,7 @@ Look for references to: - Names/APIs from completed task spec (now stale) - Assumptions about data structures - Integration points that changed +- Glossary `_Avoid_` aliases flagged in Phase 3b.1 (downstream spec uses the alias; canonical term should land instead) Flag tasks that need updates. @@ -127,7 +166,9 @@ Changes should: - Update variable/function names to match actual - Correct API signatures - Fix data structure assumptions +- Replace glossary aliases (Phase 3b.1) with the canonical term; preserve surrounding prose - Add note: `` +- For glossary renames, the breadcrumb names the alias and canonical term: `` **DO NOT:** - Change task scope or requirements @@ -166,10 +207,14 @@ Drift detected: yes Would update (DRY RUN): - fn-1.3: Change references from `UserAuth.login()` to `authService.authenticate()` - fn-1.4: Update expected return type from `boolean` to `AuthResult` +- fn-1.5: Replace glossary alias "polling cycle" with canonical "feedback loop" Would update traceability: # Only if table exists - R2 (Session persistence): would add fn-1.4 coverage (API changed from fn-1.2) +Decision overrides flagged for review: # Only if DECISIONS_JSON had entries with overrides +- knowledge/decisions/use-rest-not-graphql-2026-03-12: completed task added `/graphql` endpoint in src/api/router.ts; review for supersession. + No files modified. ``` @@ -182,14 +227,20 @@ Drift detected: yes Updated tasks (same epic): - fn-1.3: Changed references from `UserAuth.login()` to `authService.authenticate()` - fn-1.4: Updated expected return type from `boolean` to `AuthResult` +- fn-1.5: Replaced glossary alias "polling cycle" with canonical "feedback loop" Updated tasks (cross-epic): # Only if CROSS_EPIC enabled and found - fn-3.2: Updated authService import path Updated traceability: # Only if table exists and rows affected - R2 (Session persistence): removed fn-1.2 coverage (API changed), now needs fn-1.4 + +Decision overrides flagged for review: # Only if DECISIONS_JSON had entries with overrides +- knowledge/decisions/use-rest-not-graphql-2026-03-12: completed task added `/graphql` endpoint in src/api/router.ts; review for supersession. ``` +**Decision overrides are surfaced, not auto-resolved.** The agent never edits the decision entry, never writes a successor, never marks anything superseded. The user (or `/flow-next:audit`) handles supersession. + ## Rules - **Read-only exploration** - Use Grep/Glob/Read for codebase, never edit source @@ -197,6 +248,8 @@ Updated traceability: # Only if table exists and rows affected - **Preserve intent** - Update references, not requirements - **Minimal changes** - Only fix stale references, don't rewrite specs - **Skip if no drift** - Return quickly if implementation matches spec +- **Glossary entries are read-only** - never Edit `GLOSSARY.md` files; the agent only consumes the JSON +- **Decision entries are read-only** - never Edit `.flow/memory/knowledge/decisions/*.md`; surface overrides for human review ## R-ID preservation (MANDATORY) diff --git a/plugins/flow-next/codex/skills/flow-next-audit/SKILL.md b/plugins/flow-next/codex/skills/flow-next-audit/SKILL.md index a2ebfbb3..585d1e78 100644 --- a/plugins/flow-next/codex/skills/flow-next-audit/SKILL.md +++ b/plugins/flow-next/codex/skills/flow-next-audit/SKILL.md @@ -11,6 +11,8 @@ Memory entries decay. A `.flow/memory/bug/runtime-errors/` entry logged six mont This skill IS the audit. The host agent (Claude Code / Codex / Droid) walks `.flow/memory/`, reads each entry, uses Read/Grep/Glob/git to verify references against the current codebase, applies engineering judgment, and decides per entry whether to **Keep / Update / Consolidate / Replace / Delete**. Optional autofix mode applies unambiguous actions and marks ambiguous as stale. +Decision entries (`.flow/memory/knowledge/decisions/`) and glossary terms (`GLOSSARY.md` files at the repo root and on the ancestor chain) are walked alongside the rest of memory. Decisions get a calibrated judging question — "does the constraint that motivated this choice still hold?" — and Replace becomes a two-step supersession (write successor, mark old `decision_status: superseded`, never `git rm`). Glossary terms are scanned for code usage; zero-hit terms get a `` HTML comment via Edit tool (no `flowctl glossary mark-stale` exists), `_Avoid_` aliases appearing in code surface as alias-creep findings. + There is no Python audit-engine, no codex/copilot subprocess dispatch, no deterministic scorer. The host agent is already an LLM and does the work directly. flowctl provides only thin persistence plumbing (`memory mark-stale`, `memory mark-fresh`, `memory search --status`) — landed by Task 2 of this epic. **Read [workflow.md](workflow.md) for the full phase-by-phase execution. Read [phases.md](phases.md) for the 5-outcomes lookup with memory-schema-specific calibration.** @@ -73,7 +75,10 @@ The goal is automated maintenance with human oversight on judgment calls — not - **Auditing legacy flat files** (`.flow/memory/pitfalls.md`, `conventions.md`, `decisions.md` at the memory root). Skip with a warning that recommends `/flow-next:memory-migrate` first. Report includes the skipped count. - **Auditing under `_audit/`, `_review/`, or any other `_*` directory** under `.flow/memory/`. - **Deleting silently.** Delete is reserved for unambiguous cases (code gone AND problem domain gone). Default to Replace or Consolidate when there's still value to preserve. -- **Inventing flowctl subcommands** beyond what Task 2 ships (`memory mark-stale`, `memory mark-fresh`, `memory search --status`). Use Write tool + git for moves and deletes. +- **`git rm` on superseded decision entries.** Decision history stays on disk. Replace for `knowledge/decisions/` entries means write a new entry and mark the old `decision_status: superseded` with `superseded_by: ` — never delete the old file. +- **Deleting glossary terms.** When a term has zero code hits, mark stale via Edit-tool HTML comment. Removing the term entry is the operator's call, surfaced in the report. +- **Inventing flowctl subcommands** beyond what fn-34 task 2 ships (`memory mark-stale`, `memory mark-fresh`, `memory search --status`). fn-38 task 2 ships only `glossary {add,list,read,remove}` — there is no `flowctl glossary mark-stale`; use Edit tool. Use Write tool + git for moves and deletes. +- **Mass-renaming code from a glossary alias-creep finding.** The audit reports file:line locations and stops there; code rename is the operator's call. - **Auto-committing without user awareness in interactive mode.** Phase 5 detects git context and asks. Autofix uses sensible defaults. - **Setting `context: fork`** — blocking-question tools must stay reachable. - **Running parallel replacement subagents.** Investigation subagents can run in parallel for 3+ independent entries; replacement subagents run sequentially to protect orchestrator context. @@ -98,13 +103,14 @@ fi Execute the phases in [workflow.md](workflow.md) in order: -0. **Discover & Triage** — walk `.flow/memory/{bug,knowledge}//`, group by module / category, count, choose interaction path (focused / batch / broad), skip legacy + `_*` directories with a counted warning. -1. **Investigate** — per entry: read frontmatter + body, verify referenced files / symbols / modules against current code via Read / Grep / Glob, check git log in the area, form Keep / Update / Consolidate / Replace / Delete recommendation with 2-4 evidence bullets and confidence. For 3+ independent entries, dispatch parallel investigation subagents (read-only). +0. **Discover & Triage** — walk `.flow/memory/{bug,knowledge}//`, group by module / category, count, choose interaction path (focused / batch / broad), skip legacy + `_*` directories with a counted warning. `knowledge/decisions/` entries are picked up automatically by the same glob. +0.5 **Glossary scan** — enumerate `GLOSSARY.md` files via `flowctl glossary list --json`; per term, grep tracked code for the term and each `_Avoid_` alias (case-insensitive whole-word, normalized whitespace); zero hits + zero alias hits → mark stale via Edit tool (HTML comment after the term heading); alias hits → surface as alias-creep finding for Phase 3 (interactive) or report (autofix); skip husk files (`count: 0`) with a single advisory. +1. **Investigate** — per entry: read frontmatter + body, verify referenced files / symbols / modules against current code via Read / Grep / Glob, check git log in the area, form Keep / Update / Consolidate / Replace / Delete recommendation with 2-4 evidence bullets and confidence. For 3+ independent entries, dispatch parallel investigation subagents (read-only). Decision entries use the calibrated judging question — "does the constraint still hold?" — see [phases.md](phases.md) §Decision-entry calibration. 1.75 **Cross-doc analysis** — compare entries sharing module / category for overlap (problem, solution, root cause, files), supersession (newer canonical entry covers older narrower precursor), contradictions. -2. **Classify** — apply [phases.md](phases.md) decision criteria. For Replace, verify evidence is sufficient to write a trustworthy successor; mark stale otherwise. -3. **Ask** — interactive only; autofix skips. Group obvious Keeps + Updates → confirm batch. Present Consolidate / Replace / non-auto-Delete individually. Lead with recommendation. One question at a time. -4. **Execute** — Keep: no edit. Update: agent edits frontmatter / body via Write tool, preserving unknown fields. Consolidate: merge unique content into canonical, `git rm` subsumed. Replace: write new entry, `git rm` old. Delete: `git rm` (only when code AND problem domain both gone). Ambiguous in autofix: `flowctl memory mark-stale`. -5. **Report + Commit** — print Kept / Updated / Consolidated / Replaced / Deleted / Marked-stale / Skipped counts plus per-entry detail. Detect git context (current branch, dirty tree). Interactive: ask commit options. Autofix: branch-and-PR on main, commit on feature branch, stage only audit-modified files. +2. **Classify** — apply [phases.md](phases.md) decision criteria. For Replace, verify evidence is sufficient to write a trustworthy successor; mark stale otherwise. For decision entries, Replace = supersede (write new entry; mark old `decision_status: superseded`, `superseded_by: `; never `git rm` the old). +3. **Ask** — interactive only; autofix skips. Group obvious Keeps + Updates → confirm batch. Present Consolidate / Replace / non-auto-Delete individually. Surface glossary alias-creep findings per alias. Lead with recommendation. One question at a time. +4. **Execute** — Keep: no edit. Update: agent edits frontmatter / body via Write tool, preserving unknown fields. Consolidate: merge unique content into canonical, `git rm` subsumed. Replace: write new entry, `git rm` old (decisions: write new + edit old's frontmatter to mark superseded, never `git rm`). Delete: `git rm` (only when code AND problem domain both gone). Glossary stale: Edit comment after term heading. Ambiguous in autofix: `flowctl memory mark-stale`. +5. **Report + Commit** — print Kept / Updated / Consolidated / Replaced / Deleted / Marked-stale / Skipped counts plus per-entry detail and a Glossary section (Kept / Marked stale / Alias-creep / Husks). Detect git context (current branch, dirty tree). Interactive: ask commit options. Autofix: branch-and-PR on main, commit on feature branch, stage only audit-modified files. 6. **Discoverability check** — verify the substantive CLAUDE.md / AGENTS.md (the one not just `@`-including the other) mentions `.flow/memory/` with schema basics (track / category / module / tags / status) and when to consult. Add a minimal line if missing — interactive asks consent, autofix surfaces as recommendation. ## Output rules @@ -125,8 +131,16 @@ Consolidated: C Replaced: Z Deleted: W Marked stale: S + +Glossary +-------- +Files scanned: F (H husks) +Terms scanned: T +Kept: K_g +Marked stale: S_g +Alias-creep flagged: A_g ``` -Then per-entry detail (id, classification, evidence, action taken). For Consolidate: which entry was canonical, what unique content was merged, what was deleted. For Replace: what the old entry recommended vs what current code does, path to successor. For Marked stale: why ambiguous. +Then per-entry detail (id, classification, evidence, action taken). For Consolidate: which entry was canonical, what unique content was merged, what was deleted. For Replace: what the old entry recommended vs what current code does, path to successor (decision Replace also notes the old entry now carries `decision_status: superseded`). For Marked stale: why ambiguous. For glossary terms: only stale + alias-creep cases get per-term lines (Keep is silent); husks get a one-line advisory each. Autofix mode splits actions into **Applied** (writes succeeded) and **Recommended** (writes failed — e.g. permission denied). The structure is the same; only the bucket differs. diff --git a/plugins/flow-next/codex/skills/flow-next-audit/phases.md b/plugins/flow-next/codex/skills/flow-next-audit/phases.md index e8151d0e..7355710e 100644 --- a/plugins/flow-next/codex/skills/flow-next-audit/phases.md +++ b/plugins/flow-next/codex/skills/flow-next-audit/phases.md @@ -12,6 +12,8 @@ For each entry, classify into exactly one outcome. Calibration below is specific For **autofix mode** ambiguity: mark as stale via `flowctl memory mark-stale` instead of guessing. +The 5 outcomes apply to every categorized entry, including the `knowledge/decisions/` category (fn-38 schema extension). Decision entries reuse the same classifier with a tighter judging question and a different shape for `Replace` — see the [Decision-entry calibration](#decision-entry-calibration) section below. + --- ## Keep @@ -227,6 +229,81 @@ That's it. No archive directory, no metadata flag. Git history preserves the fil --- +## Decision-entry calibration + +Entries under `knowledge/decisions/` (fn-38 schema) document forward-looking choices: the project picked approach X, considered Y and Z, and committed to a constraint. The 5 outcomes still apply, but the per-entry judging question changes — and `Replace` means **supersede**, not rewrite-in-place. + +### Per-entry judging question + +For non-decision entries, Phase 1 asks "is this still relevant?". For decision entries, ask: + +> **Does the constraint that motivated this decision still hold?** + +The constraint is whatever made the decision hard-to-reverse, surprising-without-context, and a real trade-off when it was made. If the constraint is still in force, the decision is still active. If the constraint has dissolved (the trade-off no longer exists, the surprising context is now the obvious default, the codebase changed shape so reversal is now cheap), the decision is a candidate for supersession. + +### Decision-specific frontmatter + +Decision entries may carry these optional fields (see `MEMORY_DECISION_FIELDS` in `flowctl.py`): + +- `decision_status`: one of `proposed`, `accepted`, `superseded` (`MEMORY_DECISION_STATUSES`) +- `superseded_by`: id of the successor entry that replaced this one +- `alternatives_considered`: list of options that were rejected when the decision was made + +When auditing, treat `decision_status: superseded` as already-handled — the entry is historical record. Audit the `superseded_by` target instead. If `superseded_by` points at a missing entry, that's an Update (broken cross-reference) on this entry. + +### Outcome calibration for decisions + +| Outcome | Meaning for a decision entry | Action | +|---------|------------------------------|--------| +| **Keep** | Constraint still holds; rejected alternatives are still rejected for the same reasons | No edit | +| **Update** | Constraint holds; only references / `alternatives_considered` text / cross-refs drifted | Edit in place; `decision_status` unchanged | +| **Consolidate** | Two decision entries cover the same choice (rare — usually means a rushed double-write) | Merge into canonical, `git rm` subsumed | +| **Replace** | Constraint no longer holds; a different choice is now in force | **Supersede** — see flow below | +| **Delete** | The entire problem area is gone (the system that needed the decision was removed) | `git rm` (prefer Replace + supersede when problem domain still exists) | + +### Replace = supersede + +For non-decision entries, `Replace` means write a successor and `git rm` the old. For decision entries, the old entry stays — it's part of the history of why the project arrived where it is. Replace becomes a two-step supersession: + +1. **Write the new decision entry** — a fresh `knowledge/decisions/-.md` describing the current choice, what changed in the constraint, and why the prior decision no longer applies. Optionally include `alternatives_considered` listing both the original alternatives and the prior decision itself (now also rejected). Include `related_to: []` for traceability. +2. **Mark the old entry superseded** — Edit the old entry's frontmatter to set `decision_status: superseded` and `superseded_by: `. Body untouched. Do **not** `git rm` — the historical record stays on disk. + +When autofix evidence is insufficient to write the successor decision (the constraint clearly dissolved but the new approach is too unstable to commit to), mark the old entry stale via `flowctl memory mark-stale` instead of half-shipping a supersession. The user (or a follow-up audit) can revisit when the new approach has settled. + +### Edge cases + +- A decision whose `decision_status` is `proposed` but never reached `accepted` (the project never committed) → if no code reflects the proposal, classify Delete; if partial implementation exists, mark stale and surface in the report. +- A decision that references a constraint visible only in external context (a contract, a partner integration, a regulatory rule) → audit cannot verify the constraint from code alone. Skip with a "cannot mechanically verify" note in the report; do not auto-Delete. +- A decision pointing at `superseded_by: ` where the successor itself is now superseded → walk the chain; the audit target is the head of the chain. + +--- + +## Glossary scan (parallel to memory audit) + +Glossary terms are not memory entries — they live in `GLOSSARY.md` files at the repo root and (optionally) under subdirectories. The audit walks them in [Phase 0.5](workflow.md) of the workflow. The 5-outcomes table doesn't apply directly; the per-term decisions are simpler: + +| Outcome | Meaning for a glossary term | Action | +|---------|-----------------------------|--------| +| **Keep** | Term has hits in tracked code (case-insensitive whole-word match) | No edit | +| **Mark stale** | Zero hits for the term AND zero hits for any `_Avoid_` alias | Edit tool: append `` HTML comment after the term heading | +| **Alias-creep** | An `_Avoid_` alias has hits in code | Phase 3 question (interactive) or stale-flag note (autofix) — propose renaming code uses to the canonical term, or moving the alias out of `_Avoid_` | + +There is no `flowctl glossary mark-stale` subcommand. Stale-marking is an Edit-tool operation only. The agent must **never delete** the term entry on stale-detection — deletion is the operator's call, surfaced as a recommendation in the report. + +### Husk awareness + +A glossary file with `count: 0` from `flowctl glossary list --json` is a husk — `# Glossary` H1 with no terms after the last term was removed. Husks have no terms to audit; skip the walk for that file and surface a single advisory in Phase 5: + +``` +GLOSSARY.md at is an empty husk (no terms defined). +Remove the file manually if it's no longer needed; flow-next keeps it as +project state per fn-38 R18. +``` + +The audit never deletes the file. Removing it is a project decision, not a memory-audit decision. + +--- + ## Mark stale (autofix ambiguous + Replace-insufficient) **Not** one of the 5 outcomes — it's the autofix-mode escape hatch and the Replace-insufficient-evidence fallback. Surface in the report under "Marked stale" with the reason. @@ -259,6 +336,12 @@ Re-mark-stale on an already-stale entry updates `last_audited` + `audit_notes`. ## Decision tree (quick reference) ``` +Is the entry under knowledge/decisions/? + yes → use the Decision-entry calibration block above + (judging question = "does the constraint still hold?"; + Replace = supersede, not git rm) + no → continue with the standard tree below + Is the entry's referenced code AND problem domain both gone? yes → Delete (auto-applicable when ALL auto-Delete criteria hold) no → continue @@ -279,3 +362,5 @@ Are there reference drifts (paths, modules, links, snippets)? ``` In autofix mode, replace any "ask user" branch with mark-stale. + +For glossary terms (separate from memory entries — see [Glossary scan](#glossary-scan-parallel-to-memory-audit) above): the tree is `code-hit? → Keep`; `no code-hit AND no alias-hit? → mark stale via Edit tool`; `alias hit in code? → Phase 3 question (interactive) or stale-flag note (autofix)`. diff --git a/plugins/flow-next/codex/skills/flow-next-audit/workflow.md b/plugins/flow-next/codex/skills/flow-next-audit/workflow.md index c3690a67..d1d1c32f 100644 --- a/plugins/flow-next/codex/skills/flow-next-audit/workflow.md +++ b/plugins/flow-next/codex/skills/flow-next-audit/workflow.md @@ -42,6 +42,8 @@ For each kept path, read the frontmatter (parser pattern from `prospect/workflow If the entry's `status` is `stale` already, surface it in the report under "Already stale" and skip investigation in autofix mode (mark-stale is idempotent — re-marking adds noise). In interactive mode, offer to refresh-investigate (rare path; user-driven). +**Decisions are auto-walked.** `MEMORY_CATEGORIES["knowledge"]` includes `decisions` (fn-38 schema extension), so the glob in §0.1 picks up `.flow/memory/knowledge/decisions/*.md` automatically — no separate phase. Decision entries get a calibrated judging question and a different `Replace` shape; see [phases.md](phases.md) §Decision-entry calibration. Decision-specific frontmatter (`decision_status`, `superseded_by`, `alternatives_considered`) is captured into the entry record for Phase 1 to use; entries with `decision_status: superseded` are surfaced as historical record and skipped (the audit target is the successor, not the superseded entry). + ### 0.2 — Detect legacy flat files ```bash @@ -129,6 +131,155 @@ Options: --- +## Phase 0.5: Glossary scan + +**Goal:** for every glossary file on the ancestor chain, verify each term has at least one usage in tracked code (term itself or any `_Avoid_` alias). Mark stale on absence; surface alias-creep as a Phase 3 signal. + +This phase runs in parallel concept to the memory walk — same audit invocation, separate scope. Glossary files are project state (not flow-next bookkeeping; see fn-38 R18). Skip the phase entirely when `flowctl glossary list --json` reports zero files. + +### 0.5.1 — Enumerate glossaries + +Use the flowctl helper as the single source of truth: + +```bash +GLOSSARY_JSON="$("$FLOWCTL" glossary list --json 2>/dev/null || echo '{"groups":[],"file_count":0,"total_terms":0}')" +``` + +JSON shape (fn-38 task 2): + +```json +{ + "groups": [ + { + "path": "/abs/path/GLOSSARY.md", + "entries": [ + { + "term": "", + "definition": "", + "avoid": ["", ""], + "relates_to": [""] + } + ], + "count": 1 + } + ], + "file_count": 1, + "total_terms": 1 +} +``` + +When `file_count == 0`, skip Phase 0.5 entirely. When `total_terms == 0` but `file_count > 0`, every group is a husk (see §0.5.4). + +### 0.5.2 — Per-term code search + +For each `(group, entry)` where `count > 0`: + +1. **Build the search corpus** — tracked source files only. Use `git ls-files` to honor `.gitignore`; exclude `.flow/`, the glossary file itself, and known build artifacts: + + ```bash + git -C "$REPO_ROOT" ls-files -z \ + | grep -zvE '^\.flow/|/GLOSSARY\.md$|^GLOSSARY\.md$|/node_modules/|/\.git/' \ + > /tmp/glossary-corpus.zlist + ``` + + On platforms where Bash file ops gate behind permissions, the host agent should fall back to Glob with the equivalent exclusion pattern. + +2. **Search for the term** — case-insensitive, whole-word match (matches T2's `_glossary_term_matches` invariant). Normalize whitespace in the term first (collapse runs of whitespace to a single space), then anchor with `\b`: + + ```bash + TERM_NORM="$(printf '%s' "$term" | tr -s '[:space:]' ' ')" + TERM_HITS=$(xargs -0 grep -liEw -- "$(printf '%s' "$TERM_NORM" | sed 's/[][\.*^$\/]/\\&/g')" \ + < /tmp/glossary-corpus.zlist 2>/dev/null | wc -l | tr -d ' ') + ``` + + The agent may also use the Grep tool directly with an equivalent pattern; either path is fine. + +3. **Search for each `_Avoid_` alias** — same matching rule. Aggregate alias hits per-alias so the report can name the offending alias. + +4. **Decide:** + + | Term hits | Any alias hits | Outcome | + |-----------|----------------|---------| + | ≥1 | (n/a) | **Keep** — record reviewed-without-change | + | 0 | 0 | **Mark stale** — Edit tool, append HTML comment after the term heading | + | 0 | ≥1 | **Mark stale + alias-creep flag** — same Edit, plus surface to Phase 3 (interactive) or report (autofix) | + | ≥1 | ≥1 | **Alias-creep flag only** — term is alive but an alias is being used in code; do not mark stale | + +### 0.5.3 — Stale-marking via Edit tool + +There is no `flowctl glossary mark-stale` subcommand. fn-38 task 2 shipped only `add / list / read / remove`; stale-marking is an Edit-tool operation on the glossary file directly. + +The Edit appends an HTML comment immediately after the term heading line (preserves the body untouched, never deletes the entry). The comment lives between the heading and the definition paragraph so a casual reader sees it and `flowctl glossary list` still parses cleanly: + +```text +## + + + + + +_Avoid_: alias-1, alias-2 +``` + +Idempotency: when the heading already has a `` comment immediately following, replace the comment in place rather than stacking. Use `Edit` with `old_string` matching the existing comment line. + +**The agent must not delete the term entry on stale-detection.** Deletion is the operator's call. The audit surfaces it as a Phase 5 recommendation: + +``` +Recommended manual review: GLOSSARY.md term "" has no code hits. +Stale comment added; consider `flowctl glossary remove ` if the concept is gone. +``` + +### 0.5.4 — Husk awareness + +A glossary file with `count: 0` (the file is `# Glossary` H1 followed by no term entries — left intact after the last term was removed; see fn-38 task 2 R18) skips the per-term walk. Surface a single Phase 5 advisory per husk: + +``` +GLOSSARY.md at is an empty husk (no terms defined). +flow-next keeps it as project state per fn-38 R18 — remove it manually if no +longer needed. +``` + +The audit never deletes the file. + +### 0.5.5 — Alias-creep handling + +When a term has alias hits in code (whether or not the canonical term also has hits): + +- **Interactive (Phase 3):** present per alias as a question. Lead with the recommendation: + + ``` + Glossary term: "" (defined in ) + _Avoid_ alias "" appears in tracked code at (and N other locations). + + Options: + 1. Rename the code uses to "" (recommended) + 2. Drop "" from the _Avoid_ list (alias is now acceptable) + 3. Skip — surface in report only + ``` + + Option 1 is a code-edit recommendation only — the audit reports the locations; the operator handles the rename. (Mass-renaming code from a memory audit is out of scope.) + Option 2 is an Edit on the glossary file: remove the alias from the `_Avoid_` list while preserving the rest of the entry. + +- **Autofix:** never auto-rename code. Surface the alias-creep finding in the report under "Recommended" with file:line locations. The agent does not Edit the glossary unless the term itself is also stale (in which case the stale comment captures the alias-creep too). + +### 0.5.6 — Carry into Phase 5 report + +Capture the per-term outcomes into a glossary section of the report (see §5.1 below). Counts: + +- `glossary_kept` — terms with code hits. +- `glossary_marked_stale` — terms with zero code hits and zero alias hits, stale comment applied. +- `glossary_alias_creep` — terms whose `_Avoid_` aliases hit code (regardless of canonical hit count). +- `glossary_husks` — files with `count: 0`. + +### Done when + +- Every glossary group with `count > 0` has every term decided (Keep / mark stale / alias-creep). +- Every husk file has a queued advisory. +- The orchestrator has a glossary-side decision map alongside the memory-side investigation map. + +--- + ## Phase 1: Investigate (per entry) **Goal:** for each entry in scope, verify its claims against the current codebase and form a recommendation with evidence. @@ -417,6 +568,25 @@ When evidence is insufficient: 2. Report what evidence was found and what's missing. 3. Recommend the user run a domain-specific solve afterwards to capture fresh context. +**Replace flow for `knowledge/decisions/` entries** — the old entry is **not** `git rm`'d. Decision history stays on disk. Two-step supersession: + +1. Subagent (or orchestrator on the main thread for short successors) writes the new decision entry under `.flow/memory/knowledge/decisions/-.md`. Include `related_to: []` and, when known, `alternatives_considered` listing both the original alternatives and the prior decision (now also rejected). +2. Orchestrator edits the old entry's frontmatter via Write tool: set `decision_status: superseded` and `superseded_by: `. Body untouched. Other frontmatter fields preserved (round-trip rules from §4.2 apply). + +Insufficient evidence on a decision Replace routes to mark-stale on the old entry — same path as non-decision Replace, but the operator's follow-up is "draft the new decision when the constraint settles" rather than "research the new code shape." + +### 4.4.1 — Glossary stale-marking (Phase 0.5 outcomes) + +For each glossary term flagged "Mark stale" in Phase 0.5, the orchestrator applies the Edit on the main thread (no subagent — short, focused edits): + +1. Open the glossary file via Read. +2. Edit the line immediately after the `## ` heading. If a `` comment already exists there, replace it (idempotent re-mark). Otherwise insert it as a new line above the definition paragraph. +3. The comment text is ``. + +Glossary edits stage in the same git context as memory edits (Phase 5 picks the commit strategy uniformly across both). + +For alias-creep findings without a stale-flag (term has hits, but `_Avoid_` alias also has hits), the orchestrator does **not** edit the glossary in autofix mode. Interactive mode may edit only if the user picks "Drop the alias from `_Avoid_`" in Phase 3. Code renames are out of scope — the audit reports file:line locations and stops there. + ### 4.5 — Delete flow ```bash @@ -466,6 +636,14 @@ Replaced: Deleted: Marked stale: Skipped (no decision): + +Glossary +-------- +Files scanned: ( husks) +Terms scanned: +Kept: +Marked stale: +Alias-creep flagged: ``` Then per-entry detail (one block each): @@ -479,10 +657,23 @@ Then per-entry detail (one block each): Action: [Consolidate only] Canonical: ; merged: []; deleted: [] [Replace only] Old guidance: ; New entry: + [Decision Replace] Successor: ; old marked decision_status=superseded (NOT git-rm'd) ``` For **Keep** outcomes, group under a "Reviewed without edits" subsection so the result is visible without git churn. +Then per-glossary-term detail (only for stale + alias-creep cases — Keep is silent): + +``` +- : + Outcome: + Term hits: + Alias hits: : , : + Action: +``` + +Husk advisories (one per file with `count: 0`) follow under a "Glossary husks" subsection. + ### 5.2 — Autofix two-section split In autofix mode, split actions into: @@ -686,14 +877,15 @@ If step 6.4 produced an instruction-file edit AND Phase 5 already committed audi The skill itself is markdown — there's no unit-test surface. The validation is invoking `/flow-next:audit` in a real session. Expected behavior: -- Phase 0 walks `.flow/memory/`, lists per-cluster counts, reports legacy skip count if `pitfalls.md` etc. exist. +- Phase 0 walks `.flow/memory/`, lists per-cluster counts, reports legacy skip count if `pitfalls.md` etc. exist. Decision entries (`knowledge/decisions/`) are picked up automatically once the schema extension lands (fn-38 task 1). +- Phase 0.5 walks every `GLOSSARY.md` on the ancestor chain via `flowctl glossary list --json`, greps tracked code per-term + per-`_Avoid_` alias, marks zero-hit terms stale via Edit tool with ``, surfaces alias-creep, advises on husks. - Phase 1 produces evidence per entry. For 3+ entries, parallel investigation subagents run. -- Phase 2 classifies; Replace candidates with insufficient evidence reclassify as mark-stale. -- Phase 3 (interactive) groups Keeps / Updates for batched confirmation; presents Consolidate / Replace / Delete individually via blocking-question tool. -- Phase 4 executes via Write / `flowctl memory mark-stale` / `git rm`. -- Phase 5 prints the report; offers commit options based on git context. +- Phase 2 classifies; Replace candidates with insufficient evidence reclassify as mark-stale. Decision entries use the calibrated judging question and the supersede shape for Replace. +- Phase 3 (interactive) groups Keeps / Updates for batched confirmation; presents Consolidate / Replace / Delete and glossary alias-creep individually via blocking-question tool. +- Phase 4 executes via Write / `flowctl memory mark-stale` / `git rm`. Decision Replace = supersede (write new + edit old's `decision_status` + `superseded_by`; never `git rm`). Glossary stale = Edit comment after term heading. +- Phase 5 prints the report (memory section + glossary section + husk advisories); offers commit options based on git context. - Phase 6 checks CLAUDE.md / AGENTS.md for `.flow/memory/` mention; offers minimal addition if missing. -In autofix mode (`/flow-next:audit mode:autofix`), Phase 3 is skipped, ambiguous entries are marked stale, and the report is the sole deliverable. +In autofix mode (`/flow-next:audit mode:autofix`), Phase 3 is skipped, ambiguous entries are marked stale, glossary alias-creep surfaces as a recommendation only, and the report is the sole deliverable. -If Phase 0 produces nothing (no categorized entries, only legacy), the skill exits cleanly with the legacy-skip count. +If Phase 0 produces nothing (no categorized entries, only legacy) AND Phase 0.5 produces nothing (no glossary files), the skill exits cleanly with the legacy-skip count. diff --git a/plugins/flow-next/codex/skills/flow-next-interview/SKILL.md b/plugins/flow-next/codex/skills/flow-next-interview/SKILL.md index 2862b61c..60a97ae2 100644 --- a/plugins/flow-next/codex/skills/flow-next-interview/SKILL.md +++ b/plugins/flow-next/codex/skills/flow-next-interview/SKILL.md @@ -59,6 +59,53 @@ If empty, ask: "What should I interview you about? Give me a Flow ID (e.g., fn-1 FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT:-$HOME/.codex}}/scripts/flowctl" ``` +### Parse `--docs` / `--no-docs` flags + +Strip `--docs` / `--no-docs` from `$ARGUMENTS` before input-type detection so they don't get confused for a Flow ID or path: + +```bash +RAW_ARGS="$ARGUMENTS" +DOC_AWARE_FORCE="" # "" = autodetect, "on" = forced on, "off" = forced off +if [[ "$RAW_ARGS" == *"--no-docs"* ]]; then + DOC_AWARE_FORCE="off" + RAW_ARGS="${RAW_ARGS//--no-docs/}" +elif [[ "$RAW_ARGS" == *"--docs"* ]]; then + DOC_AWARE_FORCE="on" + RAW_ARGS="${RAW_ARGS//--docs/}" +fi +RAW_ARGS=$(printf "%s" "$RAW_ARGS" | tr -s ' ' | sed 's/^ //;s/ $//') +# RAW_ARGS now contains the Flow ID / file path / empty. +``` + +`--docs` and `--no-docs` are mutually exclusive; if the user passes both, `--no-docs` wins (the `if/elif` checks `--no-docs` first). The `--docs` token gets left in the residual `RAW_ARGS` after stripping, which surfaces downstream as an unrecognized argument — loud failure beats silent acceptance of conflicting state. + +### Doc-aware autodetect + +Decide whether doc-aware mode (behaviors a-d below) activates. Three paths: + +1. **Forced on** (`--docs` flag): `DOC_AWARE=1`. Lazy-creates root `GLOSSARY.md` on first term resolution via `flowctl glossary add` (writes to nearest-ancestor or repo root when no ancestor exists). +2. **Forced off** (`--no-docs` flag): `DOC_AWARE=0`. Skip behaviors a-d entirely, even if artifacts exist. +3. **Autodetect** (no flag): activate when `GLOSSARY.md` has at least one defined term OR any decision entry exists. + +```bash +DOC_AWARE=0 +if [[ "$DOC_AWARE_FORCE" == "on" ]]; then + DOC_AWARE=1 +elif [[ "$DOC_AWARE_FORCE" == "off" ]]; then + DOC_AWARE=0 +else + TERMS=$("$FLOWCTL" glossary list --json 2>/dev/null | jq -r '.total_terms // 0') + DECS=$("$FLOWCTL" memory list --track knowledge --category decisions --json 2>/dev/null | jq -r '.entries | length // 0') + if [[ "${TERMS:-0}" -gt 0 || "${DECS:-0}" -gt 0 ]]; then + DOC_AWARE=1 + fi +fi +``` + +**Why `total_terms > 0` rather than `[[ -f GLOSSARY.md ]]`:** `flowctl glossary remove` leaves a `# Glossary` H1 husk on disk after the last term is removed (the file is project state, intentionally retained). A presence-only check would false-positive on an empty husk and surface phantom doc-aware questions when no canonical vocabulary is actually defined. `glossary list --json` walks the file and counts populated entries; `total_terms == 0` for a husk. + +When `DOC_AWARE=1`, the four behaviors below layer onto the standard interview workflow. When `DOC_AWARE=0`, the interview proceeds exactly as today. + ## Detect Input Type 1. **Flow epic ID pattern**: matches `fn-\d+(-[a-z0-9-]+)?` (e.g., fn-1-add-oauth, fn-12, fn-2-fix-login-bug) @@ -138,10 +185,145 @@ Example flow: Before every question, classify it via the [questions.md](questions.md) **Pre-Question Taxonomy**: - **Codebase-answerable** ("what exists / how it's wired / what conventions live here") → use Read / Grep / Glob to answer; log to spec's `## Resolved via Codebase` section with file:line evidence. +- **Glossary-lookup-answerable** (`DOC_AWARE=1` only) — terms with a canonical entry in the nearest-ancestor `GLOSSARY.md` → silently resolve from the entry; log to spec's `## Glossary Conflicts` section only when the user's wording diverges from canonical AND the term is load-bearing (see behavior (a) below). - **User-judgment-required** ("what should exist / what tradeoff to make / what priority") → ask via `request_user_input`. If you find yourself answering a "should" question via grep, that's the bug. Stop and ask the user. +#### Code-versus-assertion contradiction (`DOC_AWARE=1` — behavior (c)) + +When grep / Read reveals the code disagrees with something the user asserted ("we already have X at path Y" but Y is gone, or "the auth flow uses OAuth" but the code uses API keys), do **not** silently log under `## Resolved via Codebase`. Surface the contradiction as an `request_user_input`: + +- **header**: `Code mismatch?` +- **body**: `Code shows at ; you said . Recommended: . Confidence: [].` +- **options**: frozen — `match-code` (revise spec to align with what's there), `update-code` (treat the assertion as the goal; flag the divergence as a task), `clarify` (user explains; agent re-investigates with new context). + +Confidence tier: `[high]` when grep evidence is unambiguous (file does not exist, function signature is clearly different); `[judgment-call]` when interpretation is at play (similar names, partial overlap, recent rename). Never silently pick a side — the user owns the resolution. + +The bar for surfacing: a meaningful contradiction that affects spec correctness. If the user says "the validator returns boolean" and grep shows it returns `Result`, surface. If the user paraphrases a function's role and grep shows the role matches but the implementation differs in unrelated detail, log under `## Resolved via Codebase` and move on. + +## Doc-aware behaviors (`DOC_AWARE=1` only) + +When `DOC_AWARE=1`, four behaviors layer onto the standard interview workflow. When `DOC_AWARE=0`, skip this entire section. + +### Behavior (a) — Phase-zero glossary scan + +Before drafting the first question batch, run a glossary scan against the user's request. + +```bash +"$FLOWCTL" glossary list --json +``` + +JSON shape: + +```json +{ + "groups": [ + { + "path": "GLOSSARY.md", + "entries": [ + { "term": "Worker", "definition": "...", "avoid": ["consumer"], "relates_to": ["Queue"] } + ], + "count": 1 + } + ], + "file_count": 1, + "total_terms": 1 +} +``` + +For each defined term across `groups[].entries`, scan the user's request for occurrences. Term match is **case-insensitive whitespace-collapsed** — the same rule as `flowctl glossary read` (see `_glossary_term_matches` in `flowctl.py:401`). Do NOT reinvent matching logic; the canonical contract is "lowercase both sides, collapse runs of whitespace to single space, compare equal." Alias hits via `entries[].avoid`: if the user wrote `consumer` and the entry's `avoid` list contains `consumer`, that's a canonical-mismatch hit on `Worker`. + +For each hit, evaluate one filter before surfacing: + +- **Is the term load-bearing for this spec?** Casual passing mention does not trigger; mention that defines behavior or shapes acceptance does. The user wrote "the worker fetches the queue" mid-sentence about deployment — passing mention, no question. The user wrote "we need a new kind of worker that processes batches" — load-bearing, surface. + +When a hit passes the load-bearing filter AND the user's wording conflicts with canonical (alias used instead of canonical, or definition contradicts), surface as the **first interview question** via `request_user_input`: + +- **header**: `Term mismatch?` +- **body**: `You used ""; GLOSSARY.md defines "" as "". Recommended: . Confidence: [].` +- **options**: frozen — `use-canonical` (the user meant the existing term; spec uses canonical wording), `redefine` (user is updating the term meaning; spec proceeds with new wording, agent will re-write `GLOSSARY.md` via `flowctl glossary add` after the interview), `this-is-different` (the words collide but the concepts differ; spec uses a fresh disambiguating term — capture in `## Glossary Conflicts`). + +Confidence tier: `[high]` when the canonical entry is recent and the user's wording cleanly maps to an `avoid` alias; `[judgment-call]` when meaning could plausibly have drifted; `[your-call]` when the term sits in user-domain territory the agent has no purchase on. + +**Throttle:** at most one Phase-zero glossary question per interview turn. If multiple terms hit, surface the most load-bearing one first; the rest fold into the natural conversation flow as they come up. Bombarding the user with vocabulary questions before the core spec questions is the failure mode this filter prevents. + +### Behavior (b) — Fuzzy-term sharpening + +Across the conversation, watch for overloaded language — words the user keeps using whose meaning could plausibly shift between turns ("workflow", "session", "task" when a Flow `task` already has meaning, etc.). When you spot one: + +1. Propose a canonical via `request_user_input`: + - **header**: `Sharpen ""?` + - **body**: `You've used "" in turns. I'm reading it as "" but want to lock it in. Recommended: . Confidence: [].` + - **options**: 2-4 candidate canonical wordings + `none-of-these` (user provides their own). + +2. On user-pick, build the resolved entry and write it to the nearest-ancestor `GLOSSARY.md` via `flowctl glossary add`: + + ```bash + "$FLOWCTL" glossary add "" --definition-file - --json < + EOF + ``` + + Use `--definition-file -` (stdin) so multi-sentence definitions and quoted phrasing round-trip cleanly. `glossary add` is upsert — case-insensitive match replaces the existing entry in full; new terms append at the end of the file. If the user picked `redefine` in behavior (a), this is the same call site (one path, one upsert). + +3. The next question can re-read the glossary. There is no in-memory cache to invalidate — re-read on every doc-aware turn that needs canonical lookup. The cost is one stat + one file read per turn; sub-millisecond at typical sizes. + +**When to skip behavior (b):** if a term is single-use, or if the user volunteered a clear definition the first time they used it, or if the conversation is short enough (≤6 turns) that consolidation buys nothing yet. The behavior triggers when overloading is real and persistent, not on every undefined word. + +### Behavior (d) — Decision-record write (three-criteria gate) + +When the interview surfaces a choice the user is making — not just a fact about the system, a real **decision** — evaluate the three-criteria gate before drafting a memory entry. + +**The three-criteria gate** (all three must hold): + +1. **Hard-to-reverse** — undoing this later costs more than redoing it now. Schema choices, public API shapes, integration boundaries qualify; cosmetic preferences and easily-toggled flags do not. +2. **Surprising-without-context** — a future maintainer reading the result without history would ask "why this and not the obvious thing?". Anything that follows the standard pattern of the surrounding code is not surprising. +3. **Real trade-off** — there was a genuine alternative that lost. If there was no real alternative, it isn't a decision; it's a fact. + +If any of the three fails, do NOT write a decision entry. Note the choice in the spec's prose body (e.g. `## Decision Context`) and move on. The bar exists because the decisions store decays fast when filled with non-decisions. + +When all three hold: + +1. **Draft the entry** in agent memory (do not write yet). Shape: + - **Title** (1 line, ≤80 chars): the decision in noun-phrase form (e.g. "Nearest-ancestor walk for glossary lookup"). + - **Body** (1-3 sentences floor; longer when warranted): + - 1 sentence on what was chosen. + - 0-1 sentences on why. + - Optional `## Considered Options` block listing rejected alternatives with one-line reasons each. + - Optional `## Consequences` block listing what this commits the project to. + - **Module** (optional): the file or subsystem the decision shapes. + - **Tags** (optional): comma-separated, e.g. `glossary,resolution,walk`. + +2. **Show the draft via `request_user_input` before writing** — same pattern as `/flow-next:capture` Phase 4 read-back: + - **header**: `Write decision?` + - **body**: `Drafted decision entry: . Body: <one-line summary>. Recommended: approve — <one-sentence rationale why all three gate criteria hold>. Confidence: [<tier>].` + - **options**: frozen — `approve` (write), `edit` (user revises title / body / module / tags via follow-up), `skip` (do not write; the choice stays in spec prose only). + + Show the full body inline in the question or in the message preceding it; the user must be able to read what they're approving. Never write silently — even when the gate cleanly passes, the user owns the final write. + +3. **On `approve`**, call: + + ```bash + "$FLOWCTL" memory add \ + --track knowledge \ + --category decisions \ + --title "<title>" \ + --module "<module>" \ + --tags "<tags>" \ + --body-file - <<EOF + <body markdown> + EOF + ``` + + The `decisions` category is registered in flowctl's memory schema (Task 1 of this epic). Optional fields `--decision-status` (default `accepted`), `--superseded-by`, and `--alternatives-considered` are available; pass them when the conversation supplies them and skip otherwise. + +4. **On `edit`**, ask one follow-up `request_user_input` for which field changes (title / body / module / tags), capture the revision, re-show the draft, loop. Hard cap at 2 edit cycles before defaulting to `approve` / `skip`. + +5. **On `skip`**, do nothing — the choice still appears in spec prose; only the memory entry is suppressed. + +**At most one decision write per interview turn.** Even if multiple gate-passing decisions surface, ask one at a time; subsequent asks adapt to the user's energy level for read-back. + ## Question Categories Read [questions.md](questions.md) for all question categories and interview guidelines. @@ -182,6 +364,10 @@ Decisions made during interview (e.g., "Use OAuth not SAML", "Support mobile + w (optional — omit if nothing was resolved this way during the interview) Items the agent answered via Read / Grep / Glob, with file:line evidence. Separate from items the user answered. Lets reviewers spot-check assumptions later. +## Glossary Conflicts +(optional — only when DOC_AWARE=1 surfaced behavior-(a) hits during the interview) +Per-term: user-wording vs. canonical term, the resolution chosen (use-canonical / redefine / this-is-different), file:line of the canonical entry. Lets reviewers see where vocabulary tightened. + ## Open Questions Unresolved items that need research during planning @@ -222,6 +408,10 @@ Decisions made during interview (optional — omit if nothing was resolved this way during the interview) Items the agent answered via Read / Grep / Glob, with file:line evidence. Separate from items the user answered. +## Glossary Conflicts +(optional — only when DOC_AWARE=1 surfaced behavior-(a) hits during the interview) +Per-term: user-wording vs. canonical term, the resolution chosen, file:line of the canonical entry. + ## Open Questions Unresolved items @@ -278,6 +468,7 @@ Show summary: - Number of questions asked - Key decisions captured - What was written (Flow ID updated / file rewritten) +- Doc-aware mode (when `DOC_AWARE=1` was active): glossary terms added/updated via `flowctl glossary add`, decision entries written via `flowctl memory add --track knowledge --category decisions`, glossary conflicts captured under `## Glossary Conflicts` Suggest next step based on input type: - New idea / epic without tasks → `/flow-next:plan fn-N` diff --git a/plugins/flow-next/codex/skills/flow-next-interview/questions.md b/plugins/flow-next/codex/skills/flow-next-interview/questions.md index 93e93405..daec782b 100644 --- a/plugins/flow-next/codex/skills/flow-next-interview/questions.md +++ b/plugins/flow-next/codex/skills/flow-next-interview/questions.md @@ -4,21 +4,27 @@ Ask NON-OBVIOUS questions only. Expect 40+ questions for complex specs. ## Pre-Question Taxonomy -Before asking any question, classify it: +Before asking any question, classify it on three axes: | Category | Who answers | Examples | |----------|-------------|----------| | **Codebase-answerable** | Agent (Read / Grep / Glob) | "What persistence layer is used?" / "Where do existing routes live?" / "What's the test framework?" | +| **Glossary-lookup-answerable** (`DOC_AWARE=1` only) | Agent (`flowctl glossary read`) | "What does this project mean by 'worker'?" / "Is 'session' the canonical term here, or is it 'connection'?" | | **User-judgment-required** | User (`request_user_input`) | "Should we add caching?" / "What's the priority for offline support?" / "Is performance or simplicity more important here?" | -**Rule of thumb:** +**Rules of thumb:** -- "What exists / how is it wired / what conventions live here" → agent investigates, doesn't ask. +- "What exists / how is it wired / what conventions live here" → agent investigates the codebase, doesn't ask. +- "What does the project's canonical vocabulary call this?" → agent looks up the nearest-ancestor `GLOSSARY.md` (when `DOC_AWARE=1`), surfaces only when (a) no canonical entry exists and the term is overloaded (behavior (b) — fuzzy-term sharpening), or (b) the user's wording conflicts with canonical AND the term is load-bearing (behavior (a) — phase-zero scan). - "What should exist / what tradeoff to make / what priority" → user decides, agent asks. **If you find yourself answering a "should" question via grep, that's the bug.** Stop and ask the user. -**Audit trail:** every question the agent answered via codebase exploration goes into the spec's `## Resolved via Codebase` section (separate from items the user answered). Cite file:line evidence so reviewers can spot-check assumptions later — especially important when the agent's "I checked" turns out to be "I assumed." +**Audit trail:** + +- Codebase-resolved items → `## Resolved via Codebase` section with file:line evidence. +- Glossary-conflict-resolved items (when behavior (a) fired) → `## Glossary Conflicts` section with the user-wording, canonical term, and resolution. +- Both sections are separate from items the user answered. Cite evidence so reviewers can spot-check assumptions later — especially important when the agent's "I checked" turns out to be "I assumed." ## Technical Implementation diff --git a/plugins/flow-next/codex/skills/flow-next-sync/SKILL.md b/plugins/flow-next/codex/skills/flow-next-sync/SKILL.md index da9b4e5f..de64d229 100644 --- a/plugins/flow-next/codex/skills/flow-next-sync/SKILL.md +++ b/plugins/flow-next/codex/skills/flow-next-sync/SKILL.md @@ -95,7 +95,24 @@ No downstream tasks to sync (all done or none exist). ``` Stop here (success, nothing to do). -### Step 5: Spawn Plan-Sync Agent +### Step 5: Gather glossary + decisions context + +Two extra context types help the agent catch drift the spec text alone can't reveal: project-glossary terms (renames where the old spec used a term whose `_Avoid_` alias now appears in code) and active decision constraints (current code may touch files mentioned in a decision's `Consequences` section). + +```bash +GLOSSARY_JSON="$("$FLOWCTL" glossary list --json 2>/dev/null \ + || echo '{"groups":[],"file_count":0,"total_terms":0}')" +DECISIONS_JSON="$("$FLOWCTL" memory list --track knowledge --category decisions --json 2>/dev/null \ + || echo '{"entries":[],"legacy":[],"count":0,"status":"active"}')" +``` + +Both calls are best-effort — empty defaults keep the agent prompt valid when flowctl returns nothing or fails. + +When `GLOSSARY_JSON` reports `file_count == 0` AND `DECISIONS_JSON` reports `count == 0`, skip the extra context (pass the empty defaults — the agent treats them as a no-op signal). + +When `GLOSSARY_JSON.total_terms == 0` but `file_count > 0`, every group is a husk. Husks carry no signal for drift detection — pass the JSON through untouched and let the agent skip them. + +### Step 6: Spawn Plan-Sync Agent Build context and spawn via Task tool: @@ -108,16 +125,19 @@ EPIC_ID: <epic id> DOWNSTREAM_TASK_IDS: <comma-separated list from step 4> DRY_RUN: <true|false> +GLOSSARY_JSON: <output of `flowctl glossary list --json` from step 5> +DECISIONS_JSON: <output of `flowctl memory list --track knowledge --category decisions --json` from step 5> + <if DRY_RUN is true> DRY RUN MODE: Report what would change but do NOT use Edit tool. Only analyze and report drift. </if> ``` -Use Task tool with `subagent_type: flow-next:plan-sync` +Use Task tool with `subagent_type: flow-next:plan-sync`. **Note:** `COMPLETED_TASK_ID` is always provided - for task-mode it's the input task, for epic-mode it's the source task selected in Step 4. -### Step 6: Report Results +### Step 7: Report Results After agent returns, format output: diff --git a/plugins/flow-next/commands/flow-next/interview.md b/plugins/flow-next/commands/flow-next/interview.md index b6712c90..c77af878 100644 --- a/plugins/flow-next/commands/flow-next/interview.md +++ b/plugins/flow-next/commands/flow-next/interview.md @@ -1,7 +1,7 @@ --- name: flow-next:interview description: Interview & refine an epic, task, or spec file in-depth -argument-hint: "[epic ID, task ID, or file path]" +argument-hint: "[epic ID, task ID, or file path] [--docs | --no-docs]" --- # IMPORTANT: This command MUST invoke the skill `flow-next-interview` @@ -11,3 +11,16 @@ The ONLY purpose of this command is to call the `flow-next-interview` skill. You **User input:** $ARGUMENTS Pass the user input to the skill. The skill handles the interview logic. + +## Optional flags + +- `--docs` — force doc-aware mode on. The interview reads the nearest-ancestor `GLOSSARY.md` and `.flow/memory/knowledge/decisions/`, surfaces glossary conflicts, sharpens overloaded terms via `flowctl glossary add`, and writes decision entries via `flowctl memory add --track knowledge --category decisions ...` when the three-criteria gate passes. If no `GLOSSARY.md` exists yet, the first resolved term lazy-creates one at the repo root. +- `--no-docs` — force doc-aware mode off, even when `GLOSSARY.md` or decision entries exist. + +Without either flag, doc-aware mode autodetects: it activates when `GLOSSARY.md` has at least one defined term (`flowctl glossary list --json` reports `total_terms > 0`) OR `.flow/memory/knowledge/decisions/` has at least one entry. An empty `# Glossary` husk left behind after the last term is removed does not trip autodetect. + +Examples: + +- `/flow-next:interview fn-1-add-oauth` — autodetect doc-aware mode +- `/flow-next:interview fn-1-add-oauth --docs` — force doc-aware on +- `/flow-next:interview fn-1-add-oauth --no-docs` — force doc-aware off diff --git a/plugins/flow-next/docs/flowctl.md b/plugins/flow-next/docs/flowctl.md index 35a59bec..70be9b81 100644 --- a/plugins/flow-next/docs/flowctl.md +++ b/plugins/flow-next/docs/flowctl.md @@ -7,7 +7,7 @@ CLI for `.flow/` task tracking. Agents must use flowctl for all writes. ## Available Commands ``` -init, detect, status, config, review-backend, memory, prospect, +init, detect, status, config, review-backend, memory, prospect, glossary, epic, task, dep, show, epics, tasks, list, cat, ready, next, start, done, block, state-path, migrate-state, validate, triage-skip, checkpoint, prep-chat, @@ -504,6 +504,10 @@ Manage persistent learnings under `.flow/memory/`. **Schema (v0.33.0+):** Categorized YAML — one entry per file under `bug/<category>/*.md` or `knowledge/<category>/*.md`. Frontmatter: `title`, `date`, `track`, `category`, `module`, `tags`, plus track-specific fields (`problem_type` / `root_cause` / `resolution_type` for `bug`; `applies_when` for `knowledge`). Optional `status: active|stale`, `last_audited`, `audit_notes`. +**Knowledge categories:** `architecture-patterns`, `conventions`, `tooling-decisions`, `workflow`, `best-practices`, `decisions` (the last shipped in 0.39.0 for load-bearing architectural choices). Decision entries may add three optional fields: `decision_status` (enum: `proposed | accepted | superseded`), `superseded_by` (id reference), `alternatives_considered` (free-form prose). Body convention: 1–3 sentence floor describing trade-offs, irreversibility, and surprise factor. + +**Bug categories:** `build-errors`, `test-failures`, `runtime-errors`, `performance`, `security`, `integration`, `data`, `ui`. + Enable: `flowctl config set memory.enabled true`. Then `flowctl memory init`. ```bash @@ -612,6 +616,62 @@ flowctl prospect archive <artifact-id> [--json] Exit codes: corrupt artifact on `read`/`promote` → 3 (stderr `[ARTIFACT CORRUPT: <reason>]`); duplicate idea on `promote` without `--force` → 2; Ralph-block (`REVIEW_RECEIPT_PATH` / `FLOW_RALPH=1`) on `/flow-next:prospect` → 2. +### glossary + +Manage `GLOSSARY.md` — the project's canonical terminology file. Lives at the **repo root** (and optionally subdirectories), NOT inside `.flow/`. Survives `rm -rf .flow/` (R18 — terminology is the project's, not flow-next's). + +**Format:** H2-per-term markdown aligned with `open-gitops/documents` and `glossarify-md` so generic markdown tooling reads it cleanly. + +**Resolution:** Nearest-ancestor walk from cwd up to repo root, first match wins (same shape as `tsconfig.json` / EditorConfig). Cap 32 levels with cycle detection (constant: `GLOSSARY_WALK_MAX_DEPTH`). Fenced code blocks inside definitions are masked during parse so example terms in code don't get parsed as headings. + +```bash +# Add or update a term — single-line definition +flowctl glossary add <term> --definition "Short definition." [--json] + +# Add or update a term — multi-line definition from a file +flowctl glossary add <term> --definition-file body.md [--json] + +# Add or update a term — multi-line definition from stdin +flowctl glossary add <term> --definition-file - [--json] + +# Optional alias / cross-reference flags (comma-separated) +flowctl glossary add <term> --definition "..." \ + --avoid "alt1,alt2" # rendered as `_Avoid_:` italic line + --relates-to "x,y" # rendered as `_Relates to_:` italic line + +# List defined terms across every GLOSSARY.md on the ancestor chain (nearest first) +flowctl glossary list [--json] + +# Read a term — walks ancestors, first match wins +flowctl glossary read <term> [--json] + +# Remove a term — last-term remove leaves an `# Glossary` H1 husk on disk (R18) +flowctl glossary remove <term> [--json] +``` + +**JSON shapes:** + +`glossary list --json`: +```json +{ + "success": true, + "groups": [ + {"path": "GLOSSARY.md", "entries": [{"term": "Epic", "definition": "...", "avoid": [], "relates_to": []}], "count": 1} + ], + "file_count": 1, + "total_terms": 1 +} +``` + +`glossary read --json`: +```json +{"success": true, "path": "GLOSSARY.md", "term": "Epic", "definition": "...", "avoid": [], "relates_to": []} +``` + +**Husk semantics:** Last-term `remove` leaves a `# Glossary` H1 husk — the file is never deleted (R18). Doc-aware autodetect should branch on `total_terms > 0` (or `file_count > 0` and any group's `count > 0`), not on `[[ -f GLOSSARY.md ]]` — the latter would falsely activate doc-aware mode on an empty husk. + +**Helpers (Python imports):** Downstream skills should call the subcommands rather than reimplementing parsing, but the building blocks are exposed for ad-hoc reuse: `find_nearest_glossary` / `find_all_glossaries` / `parse_glossary_file` / `render_glossary_file` / `validate_glossary_entry` / `_glossary_term_matches` / `_glossary_strip_fenced_code`. Constants: `GLOSSARY_FILE` (`"GLOSSARY.md"`), `GLOSSARY_WALK_MAX_DEPTH` (`32`). + ### triage-skip Trivial-diff fast path that bypasses the configured review backend on whitelisted diffs (lockfile-only, docs-only, release-chore, generated-file-only). Returns `VERDICT=SHIP` deterministically. diff --git a/plugins/flow-next/scripts/ci_test.sh b/plugins/flow-next/scripts/ci_test.sh index 3f18823b..5915d221 100755 --- a/plugins/flow-next/scripts/ci_test.sh +++ b/plugins/flow-next/scripts/ci_test.sh @@ -171,6 +171,13 @@ echo -e "\n${YELLOW}--- Memory System ---${NC}" flowctl memory init --json >/dev/null && pass "memory init" || fail "memory init" +# fn-38 T1: lazy-dir-create — `flowctl memory init` must materialize +# `.flow/memory/knowledge/decisions/.gitkeep` (the directory loop walks +# MEMORY_CATEGORIES, so the new `decisions` slot is auto-created). +[[ -f "$TEST_DIR/.flow/memory/knowledge/decisions/.gitkeep" ]] && \ + pass "memory init lazy-creates decisions/.gitkeep" || \ + fail "memory init missing decisions/.gitkeep" + flowctl memory add --type pitfall "Never use sync IO in async handlers" --json >/dev/null && pass "memory add pitfall" || fail "memory add pitfall" flowctl memory add --type convention "Use snake_case for functions" --json >/dev/null && pass "memory add convention" || fail "memory add convention" @@ -179,6 +186,207 @@ MEM_LIST="$(flowctl memory list --json)" MEM_TOTAL="$("$PYTHON_BIN" -c "import json,sys; d=json.load(sys.stdin); print(d.get('count', 0))" <<< "$MEM_LIST")" [[ "$MEM_TOTAL" -ge 2 ]] && pass "memory list ($MEM_TOTAL total)" || fail "memory list (got $MEM_TOTAL)" +# ───────────────────────────────────────────────────────────────────────────── +# 5b. Memory: decisions track (fn-38 task 1) +# ───────────────────────────────────────────────────────────────────────────── +echo -e "\n${YELLOW}--- Memory: decisions track ---${NC}" + +# (1) round-trip write→read of all three optional fields +DEC_JSON="$(flowctl memory add \ + --track knowledge --category decisions \ + --title "Use nearest-ancestor for glossary lookup" \ + --module flowctl \ + --tags "glossary,resolution" \ + --decision-status accepted \ + --superseded-by "knowledge/decisions/foo-2026-04-30" \ + --alternatives-considered "always-root,explicit-config,meta-file" \ + --json)" +DEC_ID="$("$PYTHON_BIN" -c "import json,sys; print(json.load(sys.stdin)['entry_id'])" <<< "$DEC_JSON")" +[[ -n "$DEC_ID" ]] && pass "memory add decisions ($DEC_ID)" || fail "memory add decisions" + +DEC_PATH="$("$PYTHON_BIN" -c "import json,sys; print(json.load(sys.stdin)['path'])" <<< "$DEC_JSON")" +[[ -f "$DEC_PATH" ]] && pass "decisions entry written to disk" || fail "decisions entry missing on disk" + +# Round-trip: parse the file we wrote, verify all three optional fields survived. +"$PYTHON_BIN" - "$DEC_PATH" << 'PYTEST' +import sys +from pathlib import Path +text = Path(sys.argv[1]).read_text(encoding="utf-8") +# Crude frontmatter splitter — the schema writes flat `key: value` so a +# line-by-line scan is enough for the round-trip assertion. +fm: dict[str, str] = {} +in_fm = False +for line in text.splitlines(): + if line.strip() == "---": + if in_fm: + break + in_fm = True + continue + if not in_fm or ":" not in line: + continue + k, _, v = line.partition(":") + fm[k.strip()] = v.strip() + +errors = [] +if fm.get("decision_status") != "accepted": + errors.append(f"decision_status round-trip: got {fm.get('decision_status')!r}") +if fm.get("superseded_by") != "knowledge/decisions/foo-2026-04-30": + errors.append(f"superseded_by round-trip: got {fm.get('superseded_by')!r}") +alts = fm.get("alternatives_considered") or "" +if not (alts.startswith("[") and alts.endswith("]")): + errors.append(f"alternatives_considered should be inline-list flow style, got {alts!r}") +elif "always-root" not in alts or "explicit-config" not in alts or "meta-file" not in alts: + errors.append(f"alternatives_considered missing items: {alts!r}") + +if errors: + for e in errors: + print(" -", e) + sys.exit(1) +print("decisions round-trip OK") +PYTEST +[[ $? -eq 0 ]] && pass "decisions optional fields round-trip" || fail "decisions optional fields round-trip" + +# (3) deterministic write order across repeated read+write cycles. Capture the +# first frontmatter block, parse + write_memory_entry the same dict, compare +# byte-for-byte. Field order is anchored by MEMORY_FIELD_ORDER; rerunning +# write_memory_entry on the same dict must produce the same bytes. +"$PYTHON_BIN" - "$TEST_DIR" "$DEC_PATH" << 'PYTEST' +import importlib.util +import sys +from pathlib import Path + +test_dir = Path(sys.argv[1]) +dec_path = Path(sys.argv[2]) +spec = importlib.util.spec_from_file_location("flowctl", test_dir / "scripts/flowctl.py") +flowctl = importlib.util.module_from_spec(spec) +spec.loader.exec_module(flowctl) + +parsed = flowctl.parse_memory_frontmatter(dec_path) +# parse → re-write → compare. write_memory_entry must produce identical +# bytes across repeated cycles (deterministic field order anchored by +# MEMORY_FIELD_ORDER). Body is empty for this fixture. +body = "" + +# Round-trip 1 +flowctl.write_memory_entry(dec_path, parsed, body) +pass1 = dec_path.read_text(encoding="utf-8") + +# Round-trip 2 — re-parse what we just wrote, write again. Idempotency check. +parsed2 = flowctl.parse_memory_frontmatter(dec_path) +flowctl.write_memory_entry(dec_path, parsed2, body) +pass2 = dec_path.read_text(encoding="utf-8") + +errors = [] +if pass1 != pass2: + errors.append("write_memory_entry produced different bytes across repeated cycles") +# Sanity: the optional-field block must appear in MEMORY_FIELD_ORDER order +# (decision_status before superseded_by before alternatives_considered). +ds = pass2.find("decision_status:") +sb = pass2.find("superseded_by:") +ac = pass2.find("alternatives_considered:") +if not (0 <= ds < sb < ac): + errors.append( + f"decision-fields out of order: ds={ds} sb={sb} ac={ac}" + ) + +if errors: + for e in errors: + print(" -", e) + sys.exit(1) +print("decisions deterministic write order OK") +PYTEST +[[ $? -eq 0 ]] && pass "decisions deterministic write order" || fail "decisions deterministic write order" + +# (2) negative case — `decision_status` outside the enum must be rejected. +# argparse enforces `choices`, so this is a usage-error (rc=2) caught before +# cmd_memory_add runs. Belt + braces: the validator also enum-checks, so we +# verify a hand-crafted dict is rejected by validate_memory_frontmatter. +set +e +flowctl memory add --track knowledge --category decisions \ + --title "Bad status" --decision-status pending --json >/dev/null 2>&1 +BAD_RC=$? +set -e +[[ $BAD_RC -ne 0 ]] && pass "decision_status rejects out-of-enum (cli)" || fail "decision_status should reject 'pending'" + +"$PYTHON_BIN" - "$TEST_DIR" << 'PYTEST' +import importlib.util +import sys +from pathlib import Path + +test_dir = Path(sys.argv[1]) +spec = importlib.util.spec_from_file_location("flowctl", test_dir / "scripts/flowctl.py") +flowctl = importlib.util.module_from_spec(spec) +spec.loader.exec_module(flowctl) + +errors = flowctl.validate_memory_frontmatter({ + "title": "Bad", + "date": "2026-04-30", + "track": "knowledge", + "category": "decisions", + "applies_when": "Bad", + "decision_status": "pending", +}) +if not any("decision_status" in e for e in errors): + print(f"validator should reject decision_status='pending', got {errors!r}") + sys.exit(1) + +# Sanity: valid status passes. +errors = flowctl.validate_memory_frontmatter({ + "title": "Good", + "date": "2026-04-30", + "track": "knowledge", + "category": "decisions", + "applies_when": "Good", + "decision_status": "proposed", +}) +if errors: + print(f"validator should accept decision_status='proposed', got {errors!r}") + sys.exit(1) + +print("decision_status enum validation OK") +PYTEST +[[ $? -eq 0 ]] && pass "decision_status enum validator" || fail "decision_status enum validator" + +# ───────────────────────────────────────────────────────────────────────────── +# 5c. Plugin-source hygiene — R17 forbidden vocabulary + R4 meta-file leaks +# (fn-38 task 7). Two-tier guard mirrors the existing +# AskUserQuestion / ToolSearch split: this canonical scan covers +# skills/, agents/, commands/, and flowctl.py; the codex mirror scan +# lives in scripts/sync-codex.sh validation block. +# ───────────────────────────────────────────────────────────────────────────── +echo -e "\n${YELLOW}--- Plugin-source hygiene (R17 + R4) ---${NC}" + +# R17: DDD vocabulary guard. Listed inline only inside the grep pattern; +# documentation refers to "the R17 forbidden list" without enumeration. +set +e +DDD_HITS="$(grep -RnE 'ubiquitous language|bounded context|domain expert|aggregate root' \ + "$PLUGIN_ROOT/skills" \ + "$PLUGIN_ROOT/scripts/flowctl.py" \ + "$PLUGIN_ROOT/agents" \ + "$PLUGIN_ROOT/commands" 2>/dev/null)" +set -e +if [[ -n "$DDD_HITS" ]]; then + fail "R17 DDD vocabulary in canonical:" + echo "$DDD_HITS" | sed 's/^/ /' +else + pass "R17: no DDD vocabulary in canonical" +fi + +# R4: no meta-file precedent leaks (early-design naming) into canonical prose. +set +e +META_HITS="$(grep -RnE 'GLOSSARY-MAP\.md|CONTEXT-MAP\.md' \ + "$PLUGIN_ROOT/skills" \ + "$PLUGIN_ROOT/scripts/flowctl.py" \ + "$PLUGIN_ROOT/agents" \ + "$PLUGIN_ROOT/commands" 2>/dev/null)" +set -e +if [[ -n "$META_HITS" ]]; then + fail "R4 meta-file refs in canonical:" + echo "$META_HITS" | sed 's/^/ /' +else + pass "R4: no meta-file refs in canonical" +fi + # ───────────────────────────────────────────────────────────────────────────── # 6. Symbol Extraction # ───────────────────────────────────────────────────────────────────────────── diff --git a/plugins/flow-next/scripts/flowctl.py b/plugins/flow-next/scripts/flowctl.py index 3a88efc3..21bf4efe 100755 --- a/plugins/flow-next/scripts/flowctl.py +++ b/plugins/flow-next/scripts/flowctl.py @@ -59,6 +59,10 @@ def _flock(f, lock_type): PROSPECTS_ARCHIVE_DIR = "_archive" CONFIG_FILE = "config.json" +# Glossary (fn-38.2): repo-root + nearest-ancestor markdown file. +GLOSSARY_FILE = "GLOSSARY.md" +GLOSSARY_WALK_MAX_DEPTH = 32 # Defensive cap against pathological symlinks. + EPIC_STATUS = ["open", "done"] TASK_STATUS = ["todo", "in_progress", "blocked", "done"] @@ -109,6 +113,298 @@ def ensure_flow_exists() -> bool: return get_flow_dir().exists() +# --- Glossary helpers (fn-38.2) --- +# +# `GLOSSARY.md` is a plain markdown file with H2-per-term sections. It lives +# at the repo root (project state, not flow-next bookkeeping — survives a +# `rm -rf .flow/`). Subdirectory `GLOSSARY.md` files are supported via +# nearest-ancestor resolution (tsconfig pattern, bounded at the git repo +# root per gitignore convention). +# +# File shape: +# +# # Glossary +# +# ## Term Name +# One or more paragraphs of definition. +# +# _Avoid_: alias one, alias two +# +# _Relates to_: [Other Term](#other-term) +# +# Parser invariants: +# - Fenced code blocks are stripped before heading scan (so `## not a heading` +# inside a fence is not picked up). +# - CRLF normalized to LF on parse. +# - H2 lines (`^## term$`) anchor each entry; everything between this H2 +# and the next H2 (or EOF) is the entry body. +# - `_Avoid_:` italic line inside an entry yields the alias list. +# - `_Relates to_:` italic line yields the relationships list (raw lines +# preserved verbatim — anchor links survive a roundtrip). +# - Last-term-removal leaves a `# Glossary` husk; never delete the file +# (Constraints: project state). + + +def find_nearest_glossary(start: Optional[Path] = None) -> Optional[Path]: + """Return the nearest-ancestor `GLOSSARY.md` from `start` (default: cwd). + + Walks `start` → `start.parent` → ... until either: + * a `GLOSSARY.md` file exists in the current directory (return it), or + * the git repo root is reached (check the root, then stop), or + * the filesystem `st_dev` changes (boundary crossed, stop), or + * `GLOSSARY_WALK_MAX_DEPTH` levels traversed (defensive cap). + + Symlinks are NOT manually followed: `Path.parent` is purely lexical; + the kernel handles `ELOOP` if the caller has resolved a path through + symlinks. Walks via `Path.parent` only (no `.resolve()` per step) so + we don't accidentally chase symlinks across the filesystem. + """ + cwd = (start or Path.cwd()).resolve() + + # Anchor: where the walk must terminate. + try: + repo_root = get_repo_root().resolve() + except Exception: + repo_root = cwd # No git → walk a single dir then stop. + + try: + start_dev = cwd.stat().st_dev + except OSError: + return None + + current = cwd + for _ in range(GLOSSARY_WALK_MAX_DEPTH): + candidate = current / GLOSSARY_FILE + try: + if candidate.is_file(): + return candidate + except OSError: + pass + + # Reached repo root: check the root file (already done above) and stop. + if current == repo_root: + return None + + # Don't ascend past the git repo root. + try: + if repo_root not in current.parents: + return None + except Exception: + return None + + parent = current.parent + if parent == current: + return None # Filesystem root. + + # Boundary check: don't cross filesystems. + try: + if parent.stat().st_dev != start_dev: + return None + except OSError: + return None + + current = parent + + return None # Hit the depth cap — defensive, treat as "not found". + + +def find_all_glossaries(start: Optional[Path] = None) -> list[Path]: + """Return every `GLOSSARY.md` on the ancestor chain (nearest first). + + Used by `glossary list` to group by file when multiple glossaries are + present. Bounded the same way as `find_nearest_glossary` (repo root, + `st_dev`, depth cap). + """ + cwd = (start or Path.cwd()).resolve() + try: + repo_root = get_repo_root().resolve() + except Exception: + repo_root = cwd + + try: + start_dev = cwd.stat().st_dev + except OSError: + return [] + + found: list[Path] = [] + current = cwd + for _ in range(GLOSSARY_WALK_MAX_DEPTH): + candidate = current / GLOSSARY_FILE + try: + if candidate.is_file(): + found.append(candidate) + except OSError: + pass + + if current == repo_root: + break + try: + if repo_root not in current.parents: + break + except Exception: + break + parent = current.parent + if parent == current: + break + try: + if parent.stat().st_dev != start_dev: + break + except OSError: + break + current = parent + return found + + +# --- Glossary parse / render --- + +_GLOSSARY_FENCE_RE = re.compile(r"```.*?```", re.DOTALL) +_GLOSSARY_HEADING_RE = re.compile(r"^##\s+(.+?)\s*$", re.MULTILINE) +_GLOSSARY_AVOID_RE = re.compile(r"^_Avoid_:\s*(.+?)\s*$", re.MULTILINE) +_GLOSSARY_RELATES_RE = re.compile(r"^_Relates to_:\s*(.+?)\s*$", re.MULTILINE) + + +def _glossary_strip_fenced_code(text: str) -> str: + """Mask each fenced code block byte-for-byte so heading-scan offsets stay + aligned with the original text. + + Each non-newline byte inside a fence becomes a space; newlines are kept + so line numbers don't shift. This means an `^##\\s+...` line *inside* + a fence has its leading `##` blanked, so it is not picked up by the + heading regex, while the masked string remains the same length as the + original (we slice the original text using offsets from the masked + text). + """ + def _blank_replace(m: re.Match) -> str: + return "".join("\n" if c == "\n" else " " for c in m.group(0)) + return _GLOSSARY_FENCE_RE.sub(_blank_replace, text) + + +def parse_glossary_file(text: str) -> list[dict[str, Any]]: + """Parse a `GLOSSARY.md` body into a list of term entries. + + Returns a list of dicts with keys: + - `term` : str — heading text (verbatim, trimmed) + - `definition`: str — body text after the heading, before optional + `_Avoid_:` / `_Relates to_:` italic lines + - `avoid` : list[str] — comma-split aliases (empty list if none) + - `relates_to`: list[str] — raw `_Relates to_:` line content split on + ", " (empty list if none) + + CRLF normalized; fenced code blocks blanked before heading scan so + `## not a heading` inside a fence is not picked up. + """ + if not text: + return [] + # Normalize line endings. + text = text.replace("\r\n", "\n").replace("\r", "\n") + masked = _glossary_strip_fenced_code(text) + + headings = list(_GLOSSARY_HEADING_RE.finditer(masked)) + entries: list[dict[str, Any]] = [] + for i, m in enumerate(headings): + term = m.group(1).strip() + body_start = m.end() + body_end = headings[i + 1].start() if i + 1 < len(headings) else len(text) + body = text[body_start:body_end] + + avoid_match = _GLOSSARY_AVOID_RE.search(body) + relates_match = _GLOSSARY_RELATES_RE.search(body) + + # Strip avoid/relates lines from the definition slice. + def_text = body + for stripped_match in (avoid_match, relates_match): + if stripped_match is not None: + def_text = ( + def_text[: stripped_match.start()] + + def_text[stripped_match.end() :] + ) + + avoid: list[str] = [] + if avoid_match is not None: + raw = avoid_match.group(1).strip() + avoid = [a.strip() for a in raw.split(",") if a.strip()] + + relates_to: list[str] = [] + if relates_match is not None: + raw = relates_match.group(1).strip() + relates_to = [r.strip() for r in raw.split(",") if r.strip()] + + entries.append( + { + "term": term, + "definition": def_text.strip("\n").rstrip(), + "avoid": avoid, + "relates_to": relates_to, + } + ) + + return entries + + +def render_glossary_file(entries: list[dict[str, Any]]) -> str: + """Render a glossary entry list back to markdown. + + Always emits a leading `# Glossary` H1 husk so an emptied file + (last-term-removal) is still recognizably a glossary file (Constraints + R18: never delete the file). + + Entry order is preserved (caller controls ordering — `add` appends or + updates in place; `remove` deletes by term name). + """ + parts: list[str] = ["# Glossary", ""] + for entry in entries: + term = entry["term"].strip() + parts.append(f"## {term}") + parts.append("") + definition = (entry.get("definition") or "").strip("\n").rstrip() + if definition: + parts.append(definition) + parts.append("") + avoid = entry.get("avoid") or [] + if avoid: + parts.append(f"_Avoid_: {', '.join(avoid)}") + parts.append("") + relates_to = entry.get("relates_to") or [] + if relates_to: + parts.append(f"_Relates to_: {', '.join(relates_to)}") + parts.append("") + + # Single trailing newline; never two. + out = "\n".join(parts).rstrip("\n") + "\n" + return out + + +def validate_glossary_entry(entry: dict[str, Any]) -> list[str]: + """Return validation errors for a single glossary entry (empty = valid). + + Required: `term` (non-empty), `definition` (non-empty). + Optional: `avoid` (list[str]), `relates_to` (list[str]). + """ + errors: list[str] = [] + if not isinstance(entry, dict): + return ["entry must be a dict"] + term = entry.get("term") + if not isinstance(term, str) or not term.strip(): + errors.append("term must be a non-empty string") + definition = entry.get("definition") + if not isinstance(definition, str) or not definition.strip(): + errors.append("definition must be a non-empty string") + avoid = entry.get("avoid", []) + if avoid is not None and not isinstance(avoid, list): + errors.append("avoid must be a list") + relates_to = entry.get("relates_to", []) + if relates_to is not None and not isinstance(relates_to, list): + errors.append("relates_to must be a list") + return errors + + +def _glossary_term_matches(a: str, b: str) -> bool: + """Case-insensitive whitespace-collapsed term comparison.""" + return re.sub(r"\s+", " ", a.strip().lower()) == re.sub( + r"\s+", " ", b.strip().lower() + ) + + def get_state_dir() -> Path: """Get state directory for runtime task state. @@ -3673,6 +3969,7 @@ def cmd_review_backend(args: argparse.Namespace) -> None: "tooling-decisions", "workflow", "best-practices", + "decisions", ], } @@ -3696,6 +3993,12 @@ def cmd_review_backend(args: argparse.Namespace) -> None: {"problem_type", "symptoms", "root_cause", "resolution_type"} ) MEMORY_KNOWLEDGE_FIELDS: frozenset[str] = frozenset({"applies_when"}) +# Decision-specific optional fields. Layered onto knowledge-track entries in the +# `decisions` category; permitted (but not required) on any knowledge entry so +# the schema stays additive. See fn-38 task 1 (R2 + R16). +MEMORY_DECISION_FIELDS: frozenset[str] = frozenset( + {"decision_status", "superseded_by", "alternatives_considered"} +) MEMORY_PROBLEM_TYPES: tuple[str, ...] = ( "build-error", @@ -3717,6 +4020,9 @@ def cmd_review_backend(args: argparse.Namespace) -> None: MEMORY_STATUS: tuple[str, ...] = ("active", "stale") +# Decision lifecycle for `decisions` category entries (fn-38 task 1). +MEMORY_DECISION_STATUSES: tuple[str, ...] = ("proposed", "accepted", "superseded") + # Deterministic field order for write — required first, track-specific next, # optional last. Anything not in this list is emitted alphabetically after. MEMORY_FIELD_ORDER: tuple[str, ...] = ( @@ -3731,6 +4037,9 @@ def cmd_review_backend(args: argparse.Namespace) -> None: "root_cause", "resolution_type", "applies_when", + "decision_status", + "superseded_by", + "alternatives_considered", "status", "stale_reason", "stale_date", @@ -4624,6 +4933,7 @@ def validate_memory_frontmatter(frontmatter: dict[str, Any]) -> list[str]: | MEMORY_OPTIONAL_FIELDS | MEMORY_BUG_FIELDS | MEMORY_KNOWLEDGE_FIELDS + | MEMORY_DECISION_FIELDS ) unknown = set(frontmatter.keys()) - allowed if unknown: @@ -4652,6 +4962,16 @@ def validate_memory_frontmatter(frontmatter: dict[str, Any]) -> list[str]: f"invalid status '{status}' (valid: {', '.join(MEMORY_STATUS)})" ) + decision_status = frontmatter.get("decision_status") + if ( + decision_status is not None + and decision_status not in MEMORY_DECISION_STATUSES + ): + errors.append( + f"invalid decision_status '{decision_status}' " + f"(valid: {', '.join(MEMORY_DECISION_STATUSES)})" + ) + return errors @@ -5251,6 +5571,25 @@ def cmd_memory_add(args: argparse.Namespace) -> None: if not applies_when: applies_when = title + # Decision-specific optional fields (knowledge track, `decisions` category). + # Permitted on any knowledge entry (additive); only meaningful when the + # category is `decisions`. Validation enforces enum on decision_status. + decision_status = getattr(args, "decision_status", None) + superseded_by = getattr(args, "superseded_by", None) + alternatives_considered_raw = ( + getattr(args, "alternatives_considered", None) or "" + ) + alternatives_considered = [ + a.strip() for a in alternatives_considered_raw.split(",") if a.strip() + ] + if decision_status is not None and decision_status not in MEMORY_DECISION_STATUSES: + error_exit( + f"Invalid --decision-status '{decision_status}'. Valid: " + f"{', '.join(MEMORY_DECISION_STATUSES)}.", + code=2, + use_json=args.json, + ) + # --- Overlap detection --- no_overlap = bool(getattr(args, "no_overlap_check", False)) overlap = ( @@ -5279,6 +5618,12 @@ def cmd_memory_add(args: argparse.Namespace) -> None: frontmatter["resolution_type"] = resolution_type else: frontmatter["applies_when"] = applies_when + if decision_status is not None: + frontmatter["decision_status"] = decision_status + if superseded_by: + frontmatter["superseded_by"] = superseded_by + if alternatives_considered: + frontmatter["alternatives_considered"] = alternatives_considered related_to: list[str] = [] action: str @@ -8212,6 +8557,261 @@ def cmd_prospect_promote(args: argparse.Namespace) -> None: print(f" WARNING: {artifact_warning}", file=sys.stderr) +# --- Glossary subcommands (fn-38.2) --- + + +def _glossary_load(path: Path) -> list[dict[str, Any]]: + """Read + parse a `GLOSSARY.md` file. Returns [] if file missing.""" + try: + text = path.read_text(encoding="utf-8") + except FileNotFoundError: + return [] + return parse_glossary_file(text) + + +def _glossary_resolve_target_for_add(use_json: bool) -> Path: + """Pick the write target for `glossary add`. + + Rule: write to nearest-ancestor `GLOSSARY.md` (matches read resolution). + To force a subdirectory glossary, drop an empty `GLOSSARY.md` first; + nearest-ancestor will then resolve to it. If no ancestor file exists, + create one at the repo root. + """ + target = find_nearest_glossary() + if target is not None: + return target + return get_repo_root() / GLOSSARY_FILE + + +def cmd_glossary_add(args: argparse.Namespace) -> None: + """Append or update a term entry. + + Resolution: writes to nearest-ancestor `GLOSSARY.md` (creates one at + repo root if no ancestor exists). Multi-line definitions accepted via + `--definition-file <path>` or `--definition-file -` (stdin). + + Update semantics: case-insensitive term match. Existing entry replaced + in full (definition + avoid + relates_to all overwritten). New entries + appended at the end so insertion order is stable across sessions. + """ + use_json = bool(getattr(args, "json", False)) + + term_raw = (getattr(args, "term", "") or "").strip() + if not term_raw: + error_exit("term must be non-empty", use_json=use_json) + + # Definition source: --definition (single-line) or --definition-file (multi-line). + definition_inline = getattr(args, "definition", None) + definition_file = getattr(args, "definition_file", None) + if definition_inline is not None and definition_file is not None: + error_exit( + "--definition and --definition-file are mutually exclusive", + use_json=use_json, + ) + if definition_inline is None and definition_file is None: + error_exit( + "--definition or --definition-file required", + use_json=use_json, + ) + + if definition_file is not None: + if definition_file == "-": + definition_text = sys.stdin.read() + else: + try: + definition_text = Path(definition_file).read_text(encoding="utf-8") + except OSError as exc: + error_exit( + f"failed to read --definition-file: {exc}", + use_json=use_json, + ) + return + else: + definition_text = definition_inline or "" + + # Strip a single trailing newline (common when piping from heredoc / + # editor). Internal newlines preserved. + definition_text = definition_text.rstrip("\n") + if not definition_text.strip(): + error_exit("definition must be non-empty", use_json=use_json) + + avoid_raw = getattr(args, "avoid", None) or "" + avoid_list = [a.strip() for a in avoid_raw.split(",") if a.strip()] + + relates_raw = getattr(args, "relates_to", None) or "" + relates_list = [r.strip() for r in relates_raw.split(",") if r.strip()] + + new_entry: dict[str, Any] = { + "term": term_raw, + "definition": definition_text, + "avoid": avoid_list, + "relates_to": relates_list, + } + + errors = validate_glossary_entry(new_entry) + if errors: + error_exit("; ".join(errors), use_json=use_json) + + target = _glossary_resolve_target_for_add(use_json) + entries = _glossary_load(target) + + # Case-insensitive update if term already exists; else append. + updated = False + for i, entry in enumerate(entries): + if _glossary_term_matches(entry["term"], term_raw): + entries[i] = new_entry + updated = True + break + if not updated: + entries.append(new_entry) + + rendered = render_glossary_file(entries) + atomic_write(target, rendered) + + if use_json: + json_output( + { + "path": str(target), + "term": term_raw, + "action": "updated" if updated else "created", + "entry_count": len(entries), + } + ) + else: + action = "updated" if updated else "added" + print(f"{action} '{term_raw}' in {target}") + + +def cmd_glossary_list(args: argparse.Namespace) -> None: + """List defined terms across every `GLOSSARY.md` on the ancestor chain. + + Multiple files are grouped by file (nearest first). Empty husks + (file with only a `# Glossary` header) emit no terms but still + appear as a group with `entries: []`. + """ + use_json = bool(getattr(args, "json", False)) + paths = find_all_glossaries() + + groups: list[dict[str, Any]] = [] + for path in paths: + entries = _glossary_load(path) + groups.append( + { + "path": str(path), + "entries": entries, + "count": len(entries), + } + ) + + if use_json: + total = sum(g["count"] for g in groups) + json_output( + { + "groups": groups, + "file_count": len(groups), + "total_terms": total, + } + ) + return + + if not groups: + print("No GLOSSARY.md found on the ancestor chain.") + print(f" (looked from cwd up to repo root, max {GLOSSARY_WALK_MAX_DEPTH} levels)") + return + + for g in groups: + print(f"# {g['path']} ({g['count']} term{'s' if g['count'] != 1 else ''})") + for entry in g["entries"]: + avoid = entry.get("avoid") or [] + avoid_disp = f" [avoid: {', '.join(avoid)}]" if avoid else "" + # First line of definition only for compact output. + first_line = (entry.get("definition") or "").splitlines()[0] \ + if entry.get("definition") else "" + print(f" {entry['term']}: {first_line}{avoid_disp}") + if not g["entries"]: + print(" (no terms — empty husk)") + print() + + +def cmd_glossary_read(args: argparse.Namespace) -> None: + """Print the entry for a term using nearest-ancestor resolution. + + Matches case-insensitively on term name. Searches every glossary on + the ancestor chain (nearest first); the first hit wins (R3). + """ + use_json = bool(getattr(args, "json", False)) + term = (getattr(args, "term", "") or "").strip() + if not term: + error_exit("term required", use_json=use_json) + + for path in find_all_glossaries(): + entries = _glossary_load(path) + for entry in entries: + if _glossary_term_matches(entry["term"], term): + if use_json: + json_output( + { + "path": str(path), + "term": entry["term"], + "definition": entry.get("definition", ""), + "avoid": entry.get("avoid") or [], + "relates_to": entry.get("relates_to") or [], + } + ) + else: + print(f"# {entry['term']} ({path})") + print() + if entry.get("definition"): + print(entry["definition"]) + if entry.get("avoid"): + print() + print(f"_Avoid_: {', '.join(entry['avoid'])}") + if entry.get("relates_to"): + print() + print(f"_Relates to_: {', '.join(entry['relates_to'])}") + return + + error_exit(f"term '{term}' not found", use_json=use_json, code=1) + + +def cmd_glossary_remove(args: argparse.Namespace) -> None: + """Delete an entry from the glossary file that defines it. + + Searches every glossary on the ancestor chain (nearest first) and + removes the term from the first file that defines it. Last-term + removal leaves a `# Glossary` husk on disk (Constraints: never delete + the file). + """ + use_json = bool(getattr(args, "json", False)) + term = (getattr(args, "term", "") or "").strip() + if not term: + error_exit("term required", use_json=use_json) + + for path in find_all_glossaries(): + entries = _glossary_load(path) + for i, entry in enumerate(entries): + if _glossary_term_matches(entry["term"], term): + removed = entries.pop(i) + rendered = render_glossary_file(entries) + atomic_write(path, rendered) + if use_json: + json_output( + { + "path": str(path), + "removed_term": removed["term"], + "entry_count": len(entries), + "husk": len(entries) == 0, + } + ) + else: + print(f"removed '{removed['term']}' from {path}") + if len(entries) == 0: + print(" (file kept as empty '# Glossary' husk)") + return + + error_exit(f"term '{term}' not found", use_json=use_json, code=1) + + def cmd_epic_create(args: argparse.Namespace) -> None: """Create a new epic.""" if not ensure_flow_exists(): @@ -15658,6 +16258,23 @@ def main() -> None: dest="applies_when", help="Knowledge track: situations this guidance applies to", ) + # Decision-specific optional fields (knowledge / decisions category). + p_memory_add.add_argument( + "--decision-status", + dest="decision_status", + choices=list(MEMORY_DECISION_STATUSES), + help="Decisions category: lifecycle (proposed | accepted | superseded)", + ) + p_memory_add.add_argument( + "--superseded-by", + dest="superseded_by", + help="Decisions category: entry id that supersedes this decision", + ) + p_memory_add.add_argument( + "--alternatives-considered", + dest="alternatives_considered", + help="Decisions category: comma-separated list of rejected alternatives", + ) # Overlap detection. p_memory_add.add_argument( "--no-overlap-check", @@ -15927,6 +16544,83 @@ def main() -> None: ) p_prospect_promote.set_defaults(func=cmd_prospect_promote) + # glossary add / list / read / remove (fn-38.2) + p_glossary = subparsers.add_parser( + "glossary", + help=( + "Project glossary commands (GLOSSARY.md at repo root or nearest " + "ancestor). Lives outside .flow/ so it survives flow-next removal." + ), + ) + glossary_sub = p_glossary.add_subparsers(dest="glossary_cmd", required=True) + + p_glossary_add = glossary_sub.add_parser( + "add", + help=( + "Add or update a term entry in the nearest-ancestor GLOSSARY.md " + "(creates one at repo root if no ancestor exists)" + ), + ) + p_glossary_add.add_argument("term", help="Term name (used as H2 heading)") + p_glossary_add.add_argument( + "--definition", + help="Single-line definition (use --definition-file for multi-line)", + ) + p_glossary_add.add_argument( + "--definition-file", + dest="definition_file", + help=( + "Read multi-line definition from file path or '-' for stdin " + "(mutually exclusive with --definition)" + ), + ) + p_glossary_add.add_argument( + "--avoid", + help=( + "Comma-separated alternative terms to avoid in favor of this one " + "(rendered as a `_Avoid_:` italic line)" + ), + ) + p_glossary_add.add_argument( + "--relates-to", + dest="relates_to", + help=( + "Comma-separated related terms / anchor links " + "(rendered as a `_Relates to_:` italic line)" + ), + ) + p_glossary_add.add_argument("--json", action="store_true", help="JSON output") + p_glossary_add.set_defaults(func=cmd_glossary_add) + + p_glossary_list = glossary_sub.add_parser( + "list", + help=( + "List defined terms across every GLOSSARY.md on the ancestor chain " + "(nearest first)" + ), + ) + p_glossary_list.add_argument("--json", action="store_true", help="JSON output") + p_glossary_list.set_defaults(func=cmd_glossary_list) + + p_glossary_read = glossary_sub.add_parser( + "read", + help=( + "Print a term entry. Resolution walks ancestors from cwd; " + "first match wins" + ), + ) + p_glossary_read.add_argument("term", help="Term name (case-insensitive match)") + p_glossary_read.add_argument("--json", action="store_true", help="JSON output") + p_glossary_read.set_defaults(func=cmd_glossary_read) + + p_glossary_remove = glossary_sub.add_parser( + "remove", + help="Remove a term entry from the file that defines it", + ) + p_glossary_remove.add_argument("term", help="Term name (case-insensitive match)") + p_glossary_remove.add_argument("--json", action="store_true", help="JSON output") + p_glossary_remove.set_defaults(func=cmd_glossary_remove) + # epic create p_epic = subparsers.add_parser("epic", help="Epic commands") epic_sub = p_epic.add_subparsers(dest="epic_cmd", required=True) diff --git a/plugins/flow-next/scripts/glossary_smoke_test.sh b/plugins/flow-next/scripts/glossary_smoke_test.sh new file mode 100755 index 00000000..36a160fe --- /dev/null +++ b/plugins/flow-next/scripts/glossary_smoke_test.sh @@ -0,0 +1,784 @@ +#!/usr/bin/env bash +# fn-38-project-glossary-decision-records-and.2 +# Smoke tests for `flowctl glossary` subcommands + nearest-ancestor walk. +# +# This is the EARLY PROOF POINT for fn-38. If file format, parser, or walk +# algorithm has bugs, downstream tasks (T3 interview integration, T4-T6 +# scout/audit/sync extensions) need revision before continuing. +# +# Cases: +# 1. Bare repo: `glossary list` reports no files +# 2. `add` creates root GLOSSARY.md when none exists +# 3. Single-line definition round-trips (read after write) +# 4. Multi-line via stdin (`--definition-file -`) preserves newlines +# 5. `--definition` and `--definition-file` mutually exclusive (rc=1) +# 6. Empty term / empty definition rejected (rc=1) +# 7. Update in place: re-add with same term (case-insensitive) replaces entry +# 8. Multiple distinct terms: list shows all, in insertion order +# 9. Nearest-ancestor walk: subdir GLOSSARY.md wins over root +# 10. Nearest-ancestor walk: read from subdir without subdir glossary falls back to root +# 11. Walk stops at git repo root (parent dirs above repo root NOT searched) +# 12. 32-level depth cap returns gracefully (None / not found, no infinite loop) +# 13. Atomic write: simulated kill mid-write leaves no half-written file +# 14. Parse roundtrip: write → read → re-render → byte-equal (or canonical) +# 15. `_Avoid_` aliases survive parse + re-render +# 16. `_Relates to_` survives parse + re-render +# 17. Term removal: only the named term goes; siblings preserved +# 18. Last-term removal hygiene: file becomes `# Glossary` husk (NOT deleted) +# 19. Fenced-code stripping: `## inside-fence` is NOT picked up as a term +# 20. R18: rm -rf .flow/ between two write phases — GLOSSARY.md files survive +# 21. R4 / no-meta-file: no GLOSSARY-MAP.md anywhere in repo +# 22. R17: no DDD jargon in flowctl glossary help text +# 23. R15: rendered file is human-readable markdown (H2 per term) +# 24. list --json shape: groups[].path, groups[].entries, total_terms +# 25. read --json shape: path, term, definition, avoid, relates_to +# +# Pure shell + Python harness — no LLM invocations. Targets <30s runtime. +# Pattern follows audit_smoke_test.sh (fn-34.3). +# +# Run from any directory other than the plugin repo root. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +PLUGIN_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" +FLOWCTL="$SCRIPT_DIR/flowctl" + +pick_python() { + if [[ -n "${PYTHON_BIN:-}" ]]; then + command -v "$PYTHON_BIN" >/dev/null 2>&1 && { echo "$PYTHON_BIN"; return; } + fi + if command -v python3 >/dev/null 2>&1; then echo "python3"; return; fi + if command -v python >/dev/null 2>&1; then echo "python"; return; fi + echo "" +} + +PYTHON_BIN="$(pick_python)" +[[ -n "$PYTHON_BIN" ]] || { echo "ERROR: python not found (need python3 or python in PATH)" >&2; exit 1; } + +# Safety: never run from the main plugin repo (matches sibling smoke scripts). +if [[ -f "$PWD/.claude-plugin/marketplace.json" ]] || [[ -f "$PWD/plugins/flow-next/.claude-plugin/plugin.json" ]]; then + echo "ERROR: refusing to run from main plugin repo. Run from any other directory." >&2 + exit 1 +fi + +TEST_DIR="/tmp/glossary-smoke-$$" +PASS=0 +FAIL=0 + +GREEN='\033[0;32m' +RED='\033[0;31m' +YELLOW='\033[1;33m' +NC='\033[0m' + +cleanup() { + if [[ "${KEEP_TEST_DIR:-0}" == "1" ]]; then + echo "Keeping test dir: $TEST_DIR" + return + fi + command -v trash >/dev/null 2>&1 && trash "$TEST_DIR" 2>/dev/null && return + find "$TEST_DIR" -depth -type f -exec rm -f {} \; 2>/dev/null || true + find "$TEST_DIR" -depth -type d -exec rmdir {} \; 2>/dev/null || true +} +trap cleanup EXIT + +ok() { echo -e "${GREEN}PASS${NC} $1"; PASS=$((PASS + 1)); } +fail() { echo -e "${RED}FAIL${NC} $1"; FAIL=$((FAIL + 1)); } + +assert_rc() { + local expected="$1" actual="$2" label="$3" + if [[ "$actual" -eq "$expected" ]]; then + ok "$label (rc=$actual)" + else + fail "$label (expected rc=$expected, got rc=$actual)" + fi +} + +assert_grep() { + local needle="$1" haystack="$2" label="$3" + if printf '%s\n' "$haystack" | grep -qF -- "$needle"; then + ok "$label (found: '$needle')" + else + fail "$label (missing: '$needle')" + { + echo "--- haystack head ---" + printf '%s\n' "$haystack" | sed -n '1,20p' + echo "---" + } >&2 || true + fi +} + +assert_no_grep() { + local needle="$1" haystack="$2" label="$3" + if printf '%s\n' "$haystack" | grep -qF -- "$needle"; then + fail "$label (found unwanted: '$needle')" + else + ok "$label (absent: '$needle')" + fi +} + +json_get() { + local file="$1" expr="$2" + "$PYTHON_BIN" -c "import json; d=json.load(open('$file')); print($expr)" 2>&1 || true +} + +assert_eq_jq() { + local file="$1" expr="$2" expected="$3" label="$4" + local actual + actual="$(json_get "$file" "$expr")" + if [[ "$actual" == "$expected" ]]; then + ok "$label ($expr == $expected)" + else + fail "$label ($expr expected $expected, got $actual)" + cat "$file" >&2 2>/dev/null || true + fi +} + +# Init a minimal git repo (no .flow/ — glossary works without it). +init_test_repo() { + local dir="$1" + mkdir -p "$dir" + ( cd "$dir" && \ + git init -q && \ + git config user.email "glossary-smoke@example.com" && \ + git config user.name "Glossary Smoke" && \ + git checkout -b main >/dev/null 2>&1 || true + git commit --allow-empty -m "init" -q + ) +} + +echo -e "${YELLOW}=== glossary smoke tests (fn-38.2) ===${NC}" +echo "Plugin root: $PLUGIN_ROOT" +echo "Test dir: $TEST_DIR" +echo + +mkdir -p "$TEST_DIR" + +REPO="$TEST_DIR/repo" +init_test_repo "$REPO" + +# ============================================================================= +# CASE 1: Bare repo — list reports no files +# ============================================================================= +echo -e "${YELLOW}--- Case 1: bare repo (no GLOSSARY.md) ---${NC}" +LIST_JSON="$TEST_DIR/case1-list.json" +( cd "$REPO" && "$FLOWCTL" glossary list --json > "$LIST_JSON" ) +assert_eq_jq "$LIST_JSON" "d['file_count']" "0" "Case 1: file_count=0 in bare repo" +assert_eq_jq "$LIST_JSON" "d['total_terms']" "0" "Case 1: total_terms=0 in bare repo" + +# ============================================================================= +# CASE 2-3: add + read round-trip (single-line) +# ============================================================================= +echo -e "${YELLOW}--- Case 2-3: add creates root file; single-line round-trip ---${NC}" +( cd "$REPO" && "$FLOWCTL" glossary add "Worker" \ + --definition "Process that consumes from the task queue." \ + --avoid "executor, runner" --json > "$TEST_DIR/case2-add.json" ) +assert_eq_jq "$TEST_DIR/case2-add.json" "d['action']" "created" "Case 2: action=created on first add" +assert_eq_jq "$TEST_DIR/case2-add.json" "d['term']" "Worker" "Case 2: term roundtrip" + +# Verify file exists at root +[[ -f "$REPO/GLOSSARY.md" ]] && ok "Case 2: GLOSSARY.md created at repo root" \ + || fail "Case 2: GLOSSARY.md missing at repo root" + +# Read back via flowctl +READ_JSON="$TEST_DIR/case3-read.json" +( cd "$REPO" && "$FLOWCTL" glossary read "Worker" --json > "$READ_JSON" ) +assert_eq_jq "$READ_JSON" "d['term']" "Worker" "Case 3: read returns term" +assert_eq_jq "$READ_JSON" "d['definition']" "Process that consumes from the task queue." \ + "Case 3: definition round-trip" + +# Avoid list round-trip +AVOID_JOIN="$(json_get "$READ_JSON" "', '.join(d['avoid'])")" +[[ "$AVOID_JOIN" == "executor, runner" ]] && ok "Case 3: avoid list round-trip" \ + || fail "Case 3: avoid list got '$AVOID_JOIN'" + +# ============================================================================= +# CASE 4: multi-line via stdin +# ============================================================================= +echo -e "${YELLOW}--- Case 4: multi-line definition via stdin ---${NC}" +MULTILINE=$'Line one of definition.\n\nLine three after blank.\n Indented continuation.' +printf '%s' "$MULTILINE" \ + | ( cd "$REPO" && "$FLOWCTL" glossary add "Pipeline" --definition-file - --json > "$TEST_DIR/case4-add.json" ) +assert_eq_jq "$TEST_DIR/case4-add.json" "d['action']" "created" "Case 4: action=created" + +READ4="$TEST_DIR/case4-read.json" +( cd "$REPO" && "$FLOWCTL" glossary read "Pipeline" --json > "$READ4" ) +DEF4="$(json_get "$READ4" "d['definition']")" +# Whole multi-line definition should match (trailing whitespace trimmed) +EXPECTED4="$MULTILINE" +if [[ "$DEF4" == "$EXPECTED4" ]]; then + ok "Case 4: multi-line definition round-trip preserved newlines" +else + fail "Case 4: multi-line round-trip mismatch" + echo " expected: $(printf '%q' "$EXPECTED4")" >&2 + echo " got: $(printf '%q' "$DEF4")" >&2 +fi + +# ============================================================================= +# CASE 5: --definition + --definition-file are mutually exclusive +# ============================================================================= +echo -e "${YELLOW}--- Case 5: mutually exclusive flags ---${NC}" +rc=0 +err="$( cd "$REPO" && "$FLOWCTL" glossary add "Bad" --definition "x" --definition-file - 2>&1 1>/dev/null <<< "y" )" || rc=$? +if [[ "$rc" -ne 0 ]]; then + ok "Case 5: mutually exclusive flags rejected (rc=$rc)" +else + fail "Case 5: expected non-zero exit" +fi +assert_grep "mutually exclusive" "$err" "Case 5: error mentions mutually exclusive" + +# ============================================================================= +# CASE 6: empty term / empty definition rejected +# ============================================================================= +echo -e "${YELLOW}--- Case 6: empty inputs rejected ---${NC}" + +rc=0 +err="$( cd "$REPO" && "$FLOWCTL" glossary add " " --definition "valid" 2>&1 1>/dev/null )" || rc=$? +[[ "$rc" -ne 0 ]] && ok "Case 6: whitespace-only term rejected (rc=$rc)" || fail "Case 6: whitespace term accepted" +assert_grep "term must be non-empty" "$err" "Case 6: error mentions term" + +rc=0 +err="$( cd "$REPO" && "$FLOWCTL" glossary add "ValidTerm" --definition " " 2>&1 1>/dev/null )" || rc=$? +[[ "$rc" -ne 0 ]] && ok "Case 6: whitespace-only definition rejected (rc=$rc)" || fail "Case 6: whitespace defn accepted" +assert_grep "definition must be non-empty" "$err" "Case 6: error mentions definition" + +# Also: missing both --definition and --definition-file +rc=0 +err="$( cd "$REPO" && "$FLOWCTL" glossary add "NoDefn" 2>&1 1>/dev/null )" || rc=$? +[[ "$rc" -ne 0 ]] && ok "Case 6: missing --definition rejected (rc=$rc)" || fail "Case 6: missing flags accepted" + +# ============================================================================= +# CASE 7: update in place (case-insensitive term match) +# ============================================================================= +echo -e "${YELLOW}--- Case 7: re-add same term updates in place ---${NC}" +# 'worker' (lowercase) should match 'Worker' (titlecase) added in Case 2. +( cd "$REPO" && "$FLOWCTL" glossary add "worker" --definition "Updated worker definition." --json > "$TEST_DIR/case7-add.json" ) +assert_eq_jq "$TEST_DIR/case7-add.json" "d['action']" "updated" "Case 7: action=updated on case-insensitive re-add" + +READ7="$TEST_DIR/case7-read.json" +( cd "$REPO" && "$FLOWCTL" glossary read "Worker" --json > "$READ7" ) +assert_eq_jq "$READ7" "d['definition']" "Updated worker definition." \ + "Case 7: definition replaced (not appended)" + +# Update should not duplicate the term +TERM_COUNT="$( cd "$REPO" && "$FLOWCTL" glossary list --json \ + | "$PYTHON_BIN" -c 'import json,sys; d=json.load(sys.stdin); print(d["total_terms"])' )" +[[ "$TERM_COUNT" == "2" ]] && ok "Case 7: total_terms=2 (Worker + Pipeline, no dupes)" \ + || fail "Case 7: expected 2 terms, got $TERM_COUNT" + +# ============================================================================= +# CASE 8: multiple terms; list groups + insertion order preserved +# ============================================================================= +echo -e "${YELLOW}--- Case 8: multiple terms, list shape ---${NC}" +( cd "$REPO" && "$FLOWCTL" glossary add "Receipt" --definition "Output of a review pass." --json >/dev/null ) +( cd "$REPO" && "$FLOWCTL" glossary add "Anchor" --definition "Confidence score (0/25/50/75/100)." --json >/dev/null ) + +LIST8="$TEST_DIR/case8-list.json" +( cd "$REPO" && "$FLOWCTL" glossary list --json > "$LIST8" ) +assert_eq_jq "$LIST8" "d['file_count']" "1" "Case 8: file_count=1" +assert_eq_jq "$LIST8" "d['total_terms']" "4" "Case 8: total_terms=4" + +# Insertion order: Worker → Pipeline → Receipt → Anchor. +# Worker's display casing is whatever Case 7's re-add specified (lowercase +# 'worker' — last write wins on casing too). Compare lowercase to keep +# the assertion focused on order, not display casing. +TERMS_ORDER_LC="$(json_get "$LIST8" "','.join(e['term'].lower() for e in d['groups'][0]['entries'])")" +[[ "$TERMS_ORDER_LC" == "worker,pipeline,receipt,anchor" ]] \ + && ok "Case 8: terms in insertion order (case-insensitive)" \ + || fail "Case 8: expected 'worker,pipeline,receipt,anchor', got '$TERMS_ORDER_LC'" + +# ============================================================================= +# CASE 9: nearest-ancestor walk — subdir wins over root +# ============================================================================= +echo -e "${YELLOW}--- Case 9: subdir GLOSSARY.md wins ---${NC}" +SUB="$REPO/services/admin" +mkdir -p "$SUB" + +# Drop an empty GLOSSARY.md in subdir to force `add` to write there. +: > "$SUB/GLOSSARY.md" + +# Now from the subdir, `add` should write to subdir (not root). +( cd "$SUB" && "$FLOWCTL" glossary add "Worker" \ + --definition "Subdir-scoped admin worker (not the root one)." --json > "$TEST_DIR/case9-add.json" ) +# macOS resolves /tmp -> /private/tmp via symlink; compare via realpath. +SUB_REALPATH="$( "$PYTHON_BIN" -c 'import os,sys; print(os.path.realpath(sys.argv[1]))' "$SUB/GLOSSARY.md" )" +ACTUAL_PATH="$(json_get "$TEST_DIR/case9-add.json" "d['path']")" +[[ "$ACTUAL_PATH" == "$SUB_REALPATH" ]] \ + && ok "Case 9: write went to subdir glossary" \ + || fail "Case 9: write path mismatch (expected '$SUB_REALPATH', got '$ACTUAL_PATH')" + +# Read from subdir resolves to subdir definition +SUB_READ="$TEST_DIR/case9-read-sub.json" +( cd "$SUB" && "$FLOWCTL" glossary read "Worker" --json > "$SUB_READ" ) +SUB_DEF="$(json_get "$SUB_READ" "d['definition']")" +[[ "$SUB_DEF" == "Subdir-scoped admin worker (not the root one)." ]] \ + && ok "Case 9: subdir read picks subdir definition" \ + || fail "Case 9: subdir read returned '$SUB_DEF'" + +# ============================================================================= +# CASE 10: read from subdir without subdir glossary falls back to root +# ============================================================================= +echo -e "${YELLOW}--- Case 10: ancestor fallback to root ---${NC}" +SUB2="$REPO/services/billing" +mkdir -p "$SUB2" +# No GLOSSARY.md in SUB2 — should walk up to root. +ROOT_READ="$TEST_DIR/case10-read.json" +( cd "$SUB2" && "$FLOWCTL" glossary read "Pipeline" --json > "$ROOT_READ" ) +ROOT_PATH="$(json_get "$ROOT_READ" "d['path']")" +ROOT_REALPATH="$( "$PYTHON_BIN" -c 'import os,sys; print(os.path.realpath(sys.argv[1]))' "$REPO/GLOSSARY.md" )" +[[ "$ROOT_PATH" == "$ROOT_REALPATH" ]] \ + && ok "Case 10: read from subdir without local glossary resolves to root" \ + || fail "Case 10: expected root path, got '$ROOT_PATH'" + +# ============================================================================= +# CASE 11: walk stops at git repo root +# ============================================================================= +echo -e "${YELLOW}--- Case 11: walk stops at git repo root ---${NC}" +# The test's parent is /tmp; if /tmp had a GLOSSARY.md somehow, the walk +# from inside REPO must NOT see it. Drop one to verify. +PARENT_GLOSSARY="$TEST_DIR/GLOSSARY.md" +cat > "$PARENT_GLOSSARY" <<'EOF' +# Glossary + +## OutOfScope +This is outside the git repo and must NOT be seen. +EOF +trap 'rm -f "'"$PARENT_GLOSSARY"'"; cleanup' EXIT + +# From REPO, looking up `OutOfScope` should fail. +rc=0 +err="$( cd "$REPO" && "$FLOWCTL" glossary read "OutOfScope" 2>&1 1>/dev/null )" || rc=$? +[[ "$rc" -ne 0 ]] && ok "Case 11: walk does NOT cross git repo root (rc=$rc)" \ + || fail "Case 11: walk crossed repo root and found OutOfScope" +assert_grep "not found" "$err" "Case 11: error mentions 'not found'" + +# ============================================================================= +# CASE 12: 32-level depth cap is enforced gracefully +# ============================================================================= +echo -e "${YELLOW}--- Case 12: 32-level depth cap ---${NC}" +# Create a deep nested directory structure (40 levels) inside repo with no +# glossary in any of the deepest levels. The walk should hit cap or repo +# root and return None gracefully (not infinite loop). +DEEP="$REPO" +for i in $(seq 1 40); do + DEEP="$DEEP/d$i" +done +mkdir -p "$DEEP" + +# Find a term that doesn't exist; should fail gracefully (not hang). +rc=0 +deep_out="$( cd "$DEEP" && timeout 10 "$FLOWCTL" glossary read "NoSuchTerm" 2>&1 1>/dev/null )" || rc=$? +[[ "$rc" -ne 0 ]] && ok "Case 12: deep walk completes without hang (rc=$rc)" \ + || fail "Case 12: unexpected success in deep walk" +assert_grep "not found" "$deep_out" "Case 12: deep walk error message reaches user" + +# ============================================================================= +# CASE 13: atomic-write: simulated crash leaves no half-written file +# ============================================================================= +echo -e "${YELLOW}--- Case 13: atomic write under crash simulation ---${NC}" +# Use the actual atomic_write helper directly via python; if a write is +# interrupted, the destination file should be unchanged. We can't truly +# kill mid-write in a unit test, but we can verify that: +# (a) atomic_write rejects a write that raises mid-content (the temp +# file is unlinked, dest unchanged), and +# (b) the temp file isn't left around in the directory. +BEFORE_HASH="$( "$PYTHON_BIN" -c 'import hashlib,sys; print(hashlib.sha256(open(sys.argv[1],"rb").read()).hexdigest())' "$REPO/GLOSSARY.md" )" + +"$PYTHON_BIN" - <<EOF +import sys, os +sys.path.insert(0, "$SCRIPT_DIR") +import flowctl +from pathlib import Path + +dest = Path("$REPO/GLOSSARY.md") + +# Monkey-patch fdopen so the write raises mid-stream. +orig_fdopen = os.fdopen +class _Wrapper: + def __init__(self, real): + self._real = real + def __enter__(self): + return self + def __exit__(self, exc_type, exc, tb): + try: self._real.close() + except Exception: pass + return False + def write(self, data): + raise OSError("simulated mid-write crash") + +def fake_fdopen(fd, *a, **k): + real = orig_fdopen(fd, *a, **k) + return _Wrapper(real) + +os.fdopen = fake_fdopen + +try: + flowctl.atomic_write(dest, "POISONED CONTENT — this must NOT land on disk\n") + print("ERROR: atomic_write did not raise", file=sys.stderr) + sys.exit(2) +except OSError: + pass +finally: + os.fdopen = orig_fdopen +EOF +ATOMIC_RC=$? +assert_rc 0 "$ATOMIC_RC" "Case 13: atomic_write raised on simulated crash" + +AFTER_HASH="$( "$PYTHON_BIN" -c 'import hashlib,sys; print(hashlib.sha256(open(sys.argv[1],"rb").read()).hexdigest())' "$REPO/GLOSSARY.md" )" +[[ "$BEFORE_HASH" == "$AFTER_HASH" ]] \ + && ok "Case 13: dest file byte-identical after simulated crash (no half-write)" \ + || fail "Case 13: dest file changed despite crash (not atomic)" + +# Also verify no .tmp leftovers in the parent dir +TMP_COUNT="$(find "$REPO" -maxdepth 1 -name '*.tmp' | wc -l | tr -d ' ')" +[[ "$TMP_COUNT" == "0" ]] \ + && ok "Case 13: no .tmp leftovers after crash" \ + || fail "Case 13: $TMP_COUNT .tmp file(s) leaked" + +# ============================================================================= +# CASE 14: parse roundtrip — write → parse → re-render → byte-identical +# ============================================================================= +echo -e "${YELLOW}--- Case 14: parse roundtrip ---${NC}" +ROUNDTRIP_OUT="$TEST_DIR/case14-out.txt" +"$PYTHON_BIN" - <<EOF > "$ROUNDTRIP_OUT" +import sys +sys.path.insert(0, "$SCRIPT_DIR") +import flowctl + +text = open("$REPO/GLOSSARY.md").read() +entries = flowctl.parse_glossary_file(text) +rendered = flowctl.render_glossary_file(entries) +print("ENTRIES:", len(entries)) +if rendered == text: + print("BYTE_EQUAL: YES") +else: + # Acceptable: "canonical-equivalent" — re-parsing rendered must yield + # the same entries. + re_entries = flowctl.parse_glossary_file(rendered) + if re_entries == entries: + print("BYTE_EQUAL: NO_BUT_CANONICAL_EQUIVALENT") + else: + print("BYTE_EQUAL: NO_AND_DIFFERENT") + print("--- ORIGINAL ---") + print(repr(text)) + print("--- RENDERED ---") + print(repr(rendered)) +EOF + +if grep -q '^BYTE_EQUAL: YES$' "$ROUNDTRIP_OUT" \ + || grep -q '^BYTE_EQUAL: NO_BUT_CANONICAL_EQUIVALENT$' "$ROUNDTRIP_OUT"; then + ok "Case 14: parse roundtrip preserves entries" +else + fail "Case 14: roundtrip diverges" + cat "$ROUNDTRIP_OUT" >&2 +fi + +# ============================================================================= +# CASE 15: _Avoid_ aliases survive parse + re-render +# ============================================================================= +echo -e "${YELLOW}--- Case 15: _Avoid_ survives roundtrip ---${NC}" +"$PYTHON_BIN" - <<EOF +import sys +sys.path.insert(0, "$SCRIPT_DIR") +import flowctl +text = open("$REPO/GLOSSARY.md").read() +entries = flowctl.parse_glossary_file(text) +worker = next(e for e in entries if e["term"].lower() == "worker") +rendered = flowctl.render_glossary_file(entries) +re_entries = flowctl.parse_glossary_file(rendered) +re_worker = next(e for e in re_entries if e["term"].lower() == "worker") +# Worker entry was last updated WITHOUT --avoid in Case 7, so its avoid +# list is empty. We still need a positive case — verify Pipeline entry's +# round-trip for completeness, then (separately) re-add Worker WITH avoid. +# Pipeline has no avoid, so verify equality: +assert worker["avoid"] == re_worker["avoid"], (worker, re_worker) +print("OK") +EOF +if [[ $? -eq 0 ]]; then + ok "Case 15: avoid list survives parse roundtrip (verified empty + Case 3 non-empty)" +else + fail "Case 15: avoid list mismatch" +fi + +# Add a fresh entry WITH _Avoid_ + _Relates to_ for an explicit roundtrip. +( cd "$REPO" && "$FLOWCTL" glossary add "Receipt" \ + --definition "Output of a review pass." \ + --avoid "result, output" \ + --relates-to "[Worker](#worker), [Anchor](#anchor)" \ + --json > /dev/null ) +RECEIPT_READ="$TEST_DIR/case15-receipt.json" +( cd "$REPO" && "$FLOWCTL" glossary read "Receipt" --json > "$RECEIPT_READ" ) +ACTUAL_AVOID="$(json_get "$RECEIPT_READ" "', '.join(d['avoid'])")" +[[ "$ACTUAL_AVOID" == "result, output" ]] \ + && ok "Case 15: _Avoid_: 'result, output' survives roundtrip" \ + || fail "Case 15: avoid roundtrip got '$ACTUAL_AVOID'" + +# ============================================================================= +# CASE 16: _Relates to_ survives parse + re-render +# ============================================================================= +echo -e "${YELLOW}--- Case 16: _Relates to_ survives roundtrip ---${NC}" +ACTUAL_REL="$(json_get "$RECEIPT_READ" "', '.join(d['relates_to'])")" +[[ "$ACTUAL_REL" == "[Worker](#worker), [Anchor](#anchor)" ]] \ + && ok "Case 16: _Relates to_ list with anchor links survives" \ + || fail "Case 16: relates_to roundtrip got '$ACTUAL_REL'" + +# ============================================================================= +# CASE 17: term removal — only that term goes +# ============================================================================= +echo -e "${YELLOW}--- Case 17: term removal preserves siblings ---${NC}" +( cd "$REPO" && "$FLOWCTL" glossary remove "Pipeline" --json > "$TEST_DIR/case17-rm.json" ) +assert_eq_jq "$TEST_DIR/case17-rm.json" "d['removed_term']" "Pipeline" "Case 17: removed term name" +assert_eq_jq "$TEST_DIR/case17-rm.json" "d['husk']" "False" "Case 17: file is not yet husk (siblings remain)" + +# Pipeline gone; Worker, Receipt, Anchor remain +LIST17="$TEST_DIR/case17-list.json" +( cd "$REPO" && "$FLOWCTL" glossary list --json > "$LIST17" ) +assert_eq_jq "$LIST17" "d['groups'][0]['count']" "3" "Case 17: 3 terms remain in root file" + +REMAINING_LC="$(json_get "$LIST17" "','.join(e['term'].lower() for e in d['groups'][0]['entries'])")" +assert_grep "worker" "$REMAINING_LC" "Case 17: Worker preserved (case-insensitive)" +assert_grep "receipt" "$REMAINING_LC" "Case 17: Receipt preserved" +assert_grep "anchor" "$REMAINING_LC" "Case 17: Anchor preserved" +assert_no_grep "pipeline" "$REMAINING_LC" "Case 17: Pipeline removed" + +# Also verify subdir glossary's Worker entry is untouched. +# macOS resolves /tmp -> /private/tmp via symlink; compare via realpath. +SUB_LIST="$TEST_DIR/case17-sub-list.json" +( cd "$SUB" && "$FLOWCTL" glossary read "Worker" --json > "$SUB_LIST" ) +SUB_PATH_RESOLVED="$( "$PYTHON_BIN" -c 'import os,sys; print(os.path.realpath(sys.argv[1]))' "$SUB/GLOSSARY.md" )" +ACTUAL_SUB_PATH="$(json_get "$SUB_LIST" "d['path']")" +[[ "$ACTUAL_SUB_PATH" == "$SUB_PATH_RESOLVED" ]] \ + && ok "Case 17: subdir Worker entry untouched (resolved path match)" \ + || fail "Case 17: subdir path mismatch (expected '$SUB_PATH_RESOLVED', got '$ACTUAL_SUB_PATH')" + +# Try removing a term that doesn't exist +rc=0 +err="$( cd "$REPO" && "$FLOWCTL" glossary remove "DoesNotExist" 2>&1 1>/dev/null )" || rc=$? +[[ "$rc" -ne 0 ]] && ok "Case 17: removing non-existent term errors (rc=$rc)" \ + || fail "Case 17: non-existent term silently succeeded" + +# ============================================================================= +# CASE 18: last-term removal hygiene — file becomes husk, NOT deleted +# ============================================================================= +echo -e "${YELLOW}--- Case 18: last-term-removal husk ---${NC}" +# Drain the subdir glossary (only had 'Worker'). After removal, file should +# remain as `# Glossary` husk with no entries. +( cd "$SUB" && "$FLOWCTL" glossary remove "Worker" --json > "$TEST_DIR/case18-rm.json" ) +assert_eq_jq "$TEST_DIR/case18-rm.json" "d['husk']" "True" "Case 18: husk=true on last-term removal" + +[[ -f "$SUB/GLOSSARY.md" ]] && ok "Case 18: file kept after last-term removal (NOT deleted)" \ + || fail "Case 18: file unexpectedly deleted" + +HUSK_CONTENT="$(cat "$SUB/GLOSSARY.md")" +assert_grep "# Glossary" "$HUSK_CONTENT" "Case 18: husk has '# Glossary' H1" + +# The husk file itself must list 0 terms. List from SUB walks up — both +# the husk (0 entries) and the root file (3 entries) appear, so use the +# per-group count for SUB's file specifically. +HUSK_LIST="$TEST_DIR/case18-husk-list.json" +( cd "$SUB" && "$FLOWCTL" glossary list --json > "$HUSK_LIST" ) +HUSK_GROUP_COUNT="$( "$PYTHON_BIN" -c ' +import json, sys, os +d = json.load(open(sys.argv[1])) +husk_path = os.path.realpath(sys.argv[2]) +for g in d["groups"]: + if g["path"] == husk_path: + print(g["count"]); sys.exit(0) +print("not-found"); sys.exit(1)' "$HUSK_LIST" "$SUB/GLOSSARY.md" )" +[[ "$HUSK_GROUP_COUNT" == "0" ]] \ + && ok "Case 18: husk file group lists 0 terms" \ + || fail "Case 18: husk group count was '$HUSK_GROUP_COUNT'" + +# And the husk file should still appear as a group (file-level visibility) +HUSK_VISIBLE="$( "$PYTHON_BIN" -c ' +import json, sys, os +d = json.load(open(sys.argv[1])) +husk_path = os.path.realpath(sys.argv[2]) +print(any(g["path"] == husk_path for g in d["groups"])) +' "$HUSK_LIST" "$SUB/GLOSSARY.md" )" +[[ "$HUSK_VISIBLE" == "True" ]] \ + && ok "Case 18: husk file still visible in list (group with 0 entries)" \ + || fail "Case 18: husk file vanished from list" + +# ============================================================================= +# CASE 19: fenced-code stripping +# ============================================================================= +echo -e "${YELLOW}--- Case 19: fenced-code stripping ---${NC}" +# Hand-craft a glossary file with a fenced code block containing fake H2s. +FENCY="$REPO/sub-with-fence/GLOSSARY.md" +mkdir -p "$REPO/sub-with-fence" +cat > "$FENCY" <<'EOF' +# Glossary + +## RealTerm +Definition mentions code: + +```bash +## not a heading +echo "## also not a heading" +``` + +More definition after the fence. + +## AnotherRealTerm +Second real term definition. +EOF + +FENCE_LIST="$TEST_DIR/case19-list.json" +( cd "$REPO/sub-with-fence" && "$FLOWCTL" glossary list --json > "$FENCE_LIST" ) +FENCE_TERMS="$(json_get "$FENCE_LIST" "','.join(e['term'] for e in d['groups'][0]['entries'])")" +[[ "$FENCE_TERMS" == "RealTerm,AnotherRealTerm" ]] \ + && ok "Case 19: fenced-code H2s NOT picked up as terms" \ + || fail "Case 19: fenced terms leaked: '$FENCE_TERMS'" + +# Verify the real definition body still includes the fence content +REAL_READ="$TEST_DIR/case19-real.json" +( cd "$REPO/sub-with-fence" && "$FLOWCTL" glossary read "RealTerm" --json > "$REAL_READ" ) +REAL_DEF="$(json_get "$REAL_READ" "d['definition']")" +assert_grep "not a heading" "$REAL_DEF" "Case 19: fence content kept inside the definition body" +assert_grep "More definition after the fence" "$REAL_DEF" "Case 19: post-fence text kept" + +# ============================================================================= +# CASE 20: R18 — rm -rf .flow/ does not affect GLOSSARY.md +# ============================================================================= +echo -e "${YELLOW}--- Case 20: R18 (.flow/ removal preserves glossaries) ---${NC}" +# Initialize .flow/ first so there's something to nuke. +( cd "$REPO" && "$FLOWCTL" init --json >/dev/null ) +[[ -d "$REPO/.flow" ]] && ok "Case 20: .flow/ exists pre-test" || fail "Case 20: .flow/ init failed" + +# Phase 1: write entries +( cd "$REPO" && "$FLOWCTL" glossary add "PreNuke" --definition "Defined before .flow/ removal." --json >/dev/null ) + +ROOT_HASH_BEFORE="$( "$PYTHON_BIN" -c 'import hashlib,sys; print(hashlib.sha256(open(sys.argv[1],"rb").read()).hexdigest())' "$REPO/GLOSSARY.md" )" +SUB_HASH_BEFORE="$( "$PYTHON_BIN" -c 'import hashlib,sys; print(hashlib.sha256(open(sys.argv[1],"rb").read()).hexdigest())' "$SUB/GLOSSARY.md" )" + +# rm -rf .flow/ — using find to avoid the global rm safety habit, but the +# point is to simulate flow-next uninstall. +find "$REPO/.flow" -depth -type f -exec rm -f {} \; 2>/dev/null || true +find "$REPO/.flow" -depth -type d -exec rmdir {} \; 2>/dev/null || true +[[ ! -d "$REPO/.flow" ]] && ok "Case 20: .flow/ removed" || fail "Case 20: .flow/ still present" + +# Both glossaries must survive byte-for-byte. +ROOT_HASH_AFTER="$( "$PYTHON_BIN" -c 'import hashlib,sys; print(hashlib.sha256(open(sys.argv[1],"rb").read()).hexdigest())' "$REPO/GLOSSARY.md" )" +SUB_HASH_AFTER="$( "$PYTHON_BIN" -c 'import hashlib,sys; print(hashlib.sha256(open(sys.argv[1],"rb").read()).hexdigest())' "$SUB/GLOSSARY.md" )" +[[ "$ROOT_HASH_BEFORE" == "$ROOT_HASH_AFTER" ]] \ + && ok "Case 20: root GLOSSARY.md byte-identical after .flow/ removal" \ + || fail "Case 20: root GLOSSARY.md changed" +[[ "$SUB_HASH_BEFORE" == "$SUB_HASH_AFTER" ]] \ + && ok "Case 20: subdir GLOSSARY.md byte-identical after .flow/ removal" \ + || fail "Case 20: subdir GLOSSARY.md changed" + +# Phase 2: write more after removal — must still work without .flow/. +# PostNuke is a new term name so action=created (we're updating the file +# in place, not creating a new file — see entry_count for the file-level +# assertion). +( cd "$REPO" && "$FLOWCTL" glossary add "PostNuke" --definition "Defined after .flow/ removal." --json > "$TEST_DIR/case20-postnuke.json" ) +assert_eq_jq "$TEST_DIR/case20-postnuke.json" "d['action']" "created" \ + "Case 20: glossary add succeeds after .flow/ removal (new term)" + +# Verify the existing entries (PreNuke + Worker + Receipt + Anchor) are +# still in the file by counting entries pre/post-PostNuke. +NUKE_COUNT="$(json_get "$TEST_DIR/case20-postnuke.json" "d['entry_count']")" +[[ "$NUKE_COUNT" -ge 5 ]] \ + && ok "Case 20: existing entries preserved across .flow/ removal (entry_count=$NUKE_COUNT ≥ 5)" \ + || fail "Case 20: entries lost after .flow/ removal (entry_count=$NUKE_COUNT)" + +# ============================================================================= +# CASE 21: R4 — no GLOSSARY-MAP.md anywhere in the repo +# ============================================================================= +echo -e "${YELLOW}--- Case 21: R4 (no meta-file) ---${NC}" +META_HITS="$(find "$REPO" -name 'GLOSSARY-MAP.md' -o -name 'glossary-map.md' 2>/dev/null | wc -l | tr -d ' ')" +[[ "$META_HITS" == "0" ]] \ + && ok "Case 21: no GLOSSARY-MAP.md anywhere in test repo" \ + || fail "Case 21: $META_HITS meta-file(s) found" + +# Also check the actual codebase (PLUGIN_ROOT is the canonical source). +SRC_META_HITS="$(find "$PLUGIN_ROOT" -name 'GLOSSARY-MAP.md' -o -name 'glossary-map.md' 2>/dev/null | wc -l | tr -d ' ')" +[[ "$SRC_META_HITS" == "0" ]] \ + && ok "Case 21: no meta-file in plugin source either" \ + || fail "Case 21: $SRC_META_HITS meta-file(s) leaked into plugin source" + +# ============================================================================= +# CASE 22: R17 — no DDD jargon in flowctl glossary help text +# ============================================================================= +echo -e "${YELLOW}--- Case 22: R17 (no DDD jargon in help) ---${NC}" +HELP_TEXT="$( "$FLOWCTL" glossary --help 2>&1; \ + "$FLOWCTL" glossary add --help 2>&1; \ + "$FLOWCTL" glossary list --help 2>&1; \ + "$FLOWCTL" glossary read --help 2>&1; \ + "$FLOWCTL" glossary remove --help 2>&1 )" +for jargon in "ubiquitous language" "bounded context" "domain expert" "aggregate root"; do + if printf '%s\n' "$HELP_TEXT" | grep -qiF -- "$jargon"; then + fail "Case 22: DDD jargon '$jargon' found in help text" + else + ok "Case 22: '$jargon' absent from help text" + fi +done + +# ============================================================================= +# CASE 23: R15 — rendered file is human-readable markdown (H2 per term) +# ============================================================================= +echo -e "${YELLOW}--- Case 23: R15 (human-readable markdown) ---${NC}" +ROOT_CONTENT="$(cat "$REPO/GLOSSARY.md")" +H2_COUNT="$(printf '%s\n' "$ROOT_CONTENT" | grep -cE '^## ')" +H1_COUNT="$(printf '%s\n' "$ROOT_CONTENT" | grep -cE '^# ')" +[[ "$H1_COUNT" -ge 1 ]] && ok "Case 23: at least one H1 (Glossary husk header)" \ + || fail "Case 23: missing H1 husk header" +[[ "$H2_COUNT" -ge 4 ]] && ok "Case 23: H2-per-term ($H2_COUNT terms)" \ + || fail "Case 23: expected ≥4 H2 headings, got $H2_COUNT" + +# Check it's markdown, not YAML/JSON. +if printf '%s\n' "$ROOT_CONTENT" | head -1 | grep -qE '^---|^\{'; then + fail "Case 23: file looks like YAML/JSON, not markdown" +else + ok "Case 23: file is markdown (no YAML frontmatter, no JSON)" +fi + +# ============================================================================= +# CASE 24: list --json shape +# ============================================================================= +echo -e "${YELLOW}--- Case 24: list --json shape ---${NC}" +LIST24="$TEST_DIR/case24-list.json" +( cd "$REPO" && "$FLOWCTL" glossary list --json > "$LIST24" ) +for key in groups file_count total_terms; do + if "$PYTHON_BIN" -c "import json,sys; sys.exit(0 if '$key' in json.load(open('$LIST24')) else 1)"; then + ok "Case 24: JSON has key '$key'" + else + fail "Case 24: JSON missing key '$key'" + fi +done +# Check group shape +for key in path entries count; do + if "$PYTHON_BIN" -c "import json,sys; d=json.load(open('$LIST24')); sys.exit(0 if '$key' in d['groups'][0] else 1)"; then + ok "Case 24: groups[0] has key '$key'" + else + fail "Case 24: groups[0] missing key '$key'" + fi +done + +# ============================================================================= +# CASE 25: read --json shape +# ============================================================================= +echo -e "${YELLOW}--- Case 25: read --json shape ---${NC}" +READ25="$TEST_DIR/case25-read.json" +( cd "$REPO" && "$FLOWCTL" glossary read "Receipt" --json > "$READ25" ) +for key in path term definition avoid relates_to; do + if "$PYTHON_BIN" -c "import json,sys; sys.exit(0 if '$key' in json.load(open('$READ25')) else 1)"; then + ok "Case 25: JSON has key '$key'" + else + fail "Case 25: JSON missing key '$key'" + fi +done + +# ============================================================================= +# Summary +# ============================================================================= +echo +echo -e "${YELLOW}=== Summary ===${NC}" +echo -e "${GREEN}PASS: $PASS${NC}" +echo -e "${RED}FAIL: $FAIL${NC}" + +if [[ "$FAIL" -gt 0 ]]; then + exit 1 +fi +exit 0 diff --git a/plugins/flow-next/scripts/smoke_test.sh b/plugins/flow-next/scripts/smoke_test.sh index ad8a2622..7cf2d9d5 100755 --- a/plugins/flow-next/scripts/smoke_test.sh +++ b/plugins/flow-next/scripts/smoke_test.sh @@ -436,14 +436,24 @@ else FAIL=$((FAIL + 1)) fi -# All 8 bug categories + 5 knowledge categories present, each with .gitkeep. +# All 8 bug categories + 6 knowledge categories present, each with .gitkeep. +# Knowledge: architecture-patterns, conventions, tooling-decisions, workflow, +# best-practices, decisions (fn-38 T1 added the last). bug_count=$(find .flow/memory/bug -mindepth 2 -name .gitkeep 2>/dev/null | wc -l | tr -d ' ') kn_count=$(find .flow/memory/knowledge -mindepth 2 -name .gitkeep 2>/dev/null | wc -l | tr -d ' ') -if [[ "$bug_count" == "8" && "$kn_count" == "5" ]]; then - echo -e "${GREEN}✓${NC} memory init creates 8 bug + 5 knowledge .gitkeep placeholders" +if [[ "$bug_count" == "8" && "$kn_count" == "6" ]]; then + echo -e "${GREEN}✓${NC} memory init creates 8 bug + 6 knowledge .gitkeep placeholders" PASS=$((PASS + 1)) else - echo -e "${RED}✗${NC} memory init placeholders (bug=$bug_count expected 8, knowledge=$kn_count expected 5)" + echo -e "${RED}✗${NC} memory init placeholders (bug=$bug_count expected 8, knowledge=$kn_count expected 6)" + FAIL=$((FAIL + 1)) +fi +# fn-38 T1 explicit lazy-dir-create assertion: decisions/.gitkeep must exist. +if [[ -f ".flow/memory/knowledge/decisions/.gitkeep" ]]; then + echo -e "${GREEN}✓${NC} memory init lazy-creates knowledge/decisions/.gitkeep" + PASS=$((PASS + 1)) +else + echo -e "${RED}✗${NC} memory init missing knowledge/decisions/.gitkeep" FAIL=$((FAIL + 1)) fi diff --git a/plugins/flow-next/skills/flow-next-audit/SKILL.md b/plugins/flow-next/skills/flow-next-audit/SKILL.md index a392dee9..1f03417b 100644 --- a/plugins/flow-next/skills/flow-next-audit/SKILL.md +++ b/plugins/flow-next/skills/flow-next-audit/SKILL.md @@ -11,6 +11,8 @@ Memory entries decay. A `.flow/memory/bug/runtime-errors/` entry logged six mont This skill IS the audit. The host agent (Claude Code / Codex / Droid) walks `.flow/memory/`, reads each entry, uses Read/Grep/Glob/git to verify references against the current codebase, applies engineering judgment, and decides per entry whether to **Keep / Update / Consolidate / Replace / Delete**. Optional autofix mode applies unambiguous actions and marks ambiguous as stale. +Decision entries (`.flow/memory/knowledge/decisions/`) and glossary terms (`GLOSSARY.md` files at the repo root and on the ancestor chain) are walked alongside the rest of memory. Decisions get a calibrated judging question — "does the constraint that motivated this choice still hold?" — and Replace becomes a two-step supersession (write successor, mark old `decision_status: superseded`, never `git rm`). Glossary terms are scanned for code usage; zero-hit terms get a `<!-- stale: ... -->` HTML comment via Edit tool (no `flowctl glossary mark-stale` exists), `_Avoid_` aliases appearing in code surface as alias-creep findings. + There is no Python audit-engine, no codex/copilot subprocess dispatch, no deterministic scorer. The host agent is already an LLM and does the work directly. flowctl provides only thin persistence plumbing (`memory mark-stale`, `memory mark-fresh`, `memory search --status`) — landed by Task 2 of this epic. **Read [workflow.md](workflow.md) for the full phase-by-phase execution. Read [phases.md](phases.md) for the 5-outcomes lookup with memory-schema-specific calibration.** @@ -72,7 +74,10 @@ The goal is automated maintenance with human oversight on judgment calls — not - **Auditing legacy flat files** (`.flow/memory/pitfalls.md`, `conventions.md`, `decisions.md` at the memory root). Skip with a warning that recommends `/flow-next:memory-migrate` first. Report includes the skipped count. - **Auditing under `_audit/`, `_review/`, or any other `_*` directory** under `.flow/memory/`. - **Deleting silently.** Delete is reserved for unambiguous cases (code gone AND problem domain gone). Default to Replace or Consolidate when there's still value to preserve. -- **Inventing flowctl subcommands** beyond what Task 2 ships (`memory mark-stale`, `memory mark-fresh`, `memory search --status`). Use Write tool + git for moves and deletes. +- **`git rm` on superseded decision entries.** Decision history stays on disk. Replace for `knowledge/decisions/` entries means write a new entry and mark the old `decision_status: superseded` with `superseded_by: <new-id>` — never delete the old file. +- **Deleting glossary terms.** When a term has zero code hits, mark stale via Edit-tool HTML comment. Removing the term entry is the operator's call, surfaced in the report. +- **Inventing flowctl subcommands** beyond what fn-34 task 2 ships (`memory mark-stale`, `memory mark-fresh`, `memory search --status`). fn-38 task 2 ships only `glossary {add,list,read,remove}` — there is no `flowctl glossary mark-stale`; use Edit tool. Use Write tool + git for moves and deletes. +- **Mass-renaming code from a glossary alias-creep finding.** The audit reports file:line locations and stops there; code rename is the operator's call. - **Auto-committing without user awareness in interactive mode.** Phase 5 detects git context and asks. Autofix uses sensible defaults. - **Setting `context: fork`** — blocking-question tools must stay reachable. - **Running parallel replacement subagents.** Investigation subagents can run in parallel for 3+ independent entries; replacement subagents run sequentially to protect orchestrator context. @@ -97,13 +102,14 @@ fi Execute the phases in [workflow.md](workflow.md) in order: -0. **Discover & Triage** — walk `.flow/memory/{bug,knowledge}/<category>/`, group by module / category, count, choose interaction path (focused / batch / broad), skip legacy + `_*` directories with a counted warning. -1. **Investigate** — per entry: read frontmatter + body, verify referenced files / symbols / modules against current code via Read / Grep / Glob, check git log in the area, form Keep / Update / Consolidate / Replace / Delete recommendation with 2-4 evidence bullets and confidence. For 3+ independent entries, dispatch parallel investigation subagents (read-only). +0. **Discover & Triage** — walk `.flow/memory/{bug,knowledge}/<category>/`, group by module / category, count, choose interaction path (focused / batch / broad), skip legacy + `_*` directories with a counted warning. `knowledge/decisions/` entries are picked up automatically by the same glob. +0.5 **Glossary scan** — enumerate `GLOSSARY.md` files via `flowctl glossary list --json`; per term, grep tracked code for the term and each `_Avoid_` alias (case-insensitive whole-word, normalized whitespace); zero hits + zero alias hits → mark stale via Edit tool (HTML comment after the term heading); alias hits → surface as alias-creep finding for Phase 3 (interactive) or report (autofix); skip husk files (`count: 0`) with a single advisory. +1. **Investigate** — per entry: read frontmatter + body, verify referenced files / symbols / modules against current code via Read / Grep / Glob, check git log in the area, form Keep / Update / Consolidate / Replace / Delete recommendation with 2-4 evidence bullets and confidence. For 3+ independent entries, dispatch parallel investigation subagents (read-only). Decision entries use the calibrated judging question — "does the constraint still hold?" — see [phases.md](phases.md) §Decision-entry calibration. 1.75 **Cross-doc analysis** — compare entries sharing module / category for overlap (problem, solution, root cause, files), supersession (newer canonical entry covers older narrower precursor), contradictions. -2. **Classify** — apply [phases.md](phases.md) decision criteria. For Replace, verify evidence is sufficient to write a trustworthy successor; mark stale otherwise. -3. **Ask** — interactive only; autofix skips. Group obvious Keeps + Updates → confirm batch. Present Consolidate / Replace / non-auto-Delete individually. Lead with recommendation. One question at a time. -4. **Execute** — Keep: no edit. Update: agent edits frontmatter / body via Write tool, preserving unknown fields. Consolidate: merge unique content into canonical, `git rm` subsumed. Replace: write new entry, `git rm` old. Delete: `git rm` (only when code AND problem domain both gone). Ambiguous in autofix: `flowctl memory mark-stale`. -5. **Report + Commit** — print Kept / Updated / Consolidated / Replaced / Deleted / Marked-stale / Skipped counts plus per-entry detail. Detect git context (current branch, dirty tree). Interactive: ask commit options. Autofix: branch-and-PR on main, commit on feature branch, stage only audit-modified files. +2. **Classify** — apply [phases.md](phases.md) decision criteria. For Replace, verify evidence is sufficient to write a trustworthy successor; mark stale otherwise. For decision entries, Replace = supersede (write new entry; mark old `decision_status: superseded`, `superseded_by: <new-id>`; never `git rm` the old). +3. **Ask** — interactive only; autofix skips. Group obvious Keeps + Updates → confirm batch. Present Consolidate / Replace / non-auto-Delete individually. Surface glossary alias-creep findings per alias. Lead with recommendation. One question at a time. +4. **Execute** — Keep: no edit. Update: agent edits frontmatter / body via Write tool, preserving unknown fields. Consolidate: merge unique content into canonical, `git rm` subsumed. Replace: write new entry, `git rm` old (decisions: write new + edit old's frontmatter to mark superseded, never `git rm`). Delete: `git rm` (only when code AND problem domain both gone). Glossary stale: Edit comment after term heading. Ambiguous in autofix: `flowctl memory mark-stale`. +5. **Report + Commit** — print Kept / Updated / Consolidated / Replaced / Deleted / Marked-stale / Skipped counts plus per-entry detail and a Glossary section (Kept / Marked stale / Alias-creep / Husks). Detect git context (current branch, dirty tree). Interactive: ask commit options. Autofix: branch-and-PR on main, commit on feature branch, stage only audit-modified files. 6. **Discoverability check** — verify the substantive CLAUDE.md / AGENTS.md (the one not just `@`-including the other) mentions `.flow/memory/` with schema basics (track / category / module / tags / status) and when to consult. Add a minimal line if missing — interactive asks consent, autofix surfaces as recommendation. ## Output rules @@ -124,8 +130,16 @@ Consolidated: C Replaced: Z Deleted: W Marked stale: S + +Glossary +-------- +Files scanned: F (H husks) +Terms scanned: T +Kept: K_g +Marked stale: S_g +Alias-creep flagged: A_g ``` -Then per-entry detail (id, classification, evidence, action taken). For Consolidate: which entry was canonical, what unique content was merged, what was deleted. For Replace: what the old entry recommended vs what current code does, path to successor. For Marked stale: why ambiguous. +Then per-entry detail (id, classification, evidence, action taken). For Consolidate: which entry was canonical, what unique content was merged, what was deleted. For Replace: what the old entry recommended vs what current code does, path to successor (decision Replace also notes the old entry now carries `decision_status: superseded`). For Marked stale: why ambiguous. For glossary terms: only stale + alias-creep cases get per-term lines (Keep is silent); husks get a one-line advisory each. Autofix mode splits actions into **Applied** (writes succeeded) and **Recommended** (writes failed — e.g. permission denied). The structure is the same; only the bucket differs. diff --git a/plugins/flow-next/skills/flow-next-audit/phases.md b/plugins/flow-next/skills/flow-next-audit/phases.md index 7ca1a542..2c4acad6 100644 --- a/plugins/flow-next/skills/flow-next-audit/phases.md +++ b/plugins/flow-next/skills/flow-next-audit/phases.md @@ -12,6 +12,8 @@ For each entry, classify into exactly one outcome. Calibration below is specific For **autofix mode** ambiguity: mark as stale via `flowctl memory mark-stale` instead of guessing. +The 5 outcomes apply to every categorized entry, including the `knowledge/decisions/` category (fn-38 schema extension). Decision entries reuse the same classifier with a tighter judging question and a different shape for `Replace` — see the [Decision-entry calibration](#decision-entry-calibration) section below. + --- ## Keep @@ -227,6 +229,81 @@ That's it. No archive directory, no metadata flag. Git history preserves the fil --- +## Decision-entry calibration + +Entries under `knowledge/decisions/` (fn-38 schema) document forward-looking choices: the project picked approach X, considered Y and Z, and committed to a constraint. The 5 outcomes still apply, but the per-entry judging question changes — and `Replace` means **supersede**, not rewrite-in-place. + +### Per-entry judging question + +For non-decision entries, Phase 1 asks "is this still relevant?". For decision entries, ask: + +> **Does the constraint that motivated this decision still hold?** + +The constraint is whatever made the decision hard-to-reverse, surprising-without-context, and a real trade-off when it was made. If the constraint is still in force, the decision is still active. If the constraint has dissolved (the trade-off no longer exists, the surprising context is now the obvious default, the codebase changed shape so reversal is now cheap), the decision is a candidate for supersession. + +### Decision-specific frontmatter + +Decision entries may carry these optional fields (see `MEMORY_DECISION_FIELDS` in `flowctl.py`): + +- `decision_status`: one of `proposed`, `accepted`, `superseded` (`MEMORY_DECISION_STATUSES`) +- `superseded_by`: id of the successor entry that replaced this one +- `alternatives_considered`: list of options that were rejected when the decision was made + +When auditing, treat `decision_status: superseded` as already-handled — the entry is historical record. Audit the `superseded_by` target instead. If `superseded_by` points at a missing entry, that's an Update (broken cross-reference) on this entry. + +### Outcome calibration for decisions + +| Outcome | Meaning for a decision entry | Action | +|---------|------------------------------|--------| +| **Keep** | Constraint still holds; rejected alternatives are still rejected for the same reasons | No edit | +| **Update** | Constraint holds; only references / `alternatives_considered` text / cross-refs drifted | Edit in place; `decision_status` unchanged | +| **Consolidate** | Two decision entries cover the same choice (rare — usually means a rushed double-write) | Merge into canonical, `git rm` subsumed | +| **Replace** | Constraint no longer holds; a different choice is now in force | **Supersede** — see flow below | +| **Delete** | The entire problem area is gone (the system that needed the decision was removed) | `git rm` (prefer Replace + supersede when problem domain still exists) | + +### Replace = supersede + +For non-decision entries, `Replace` means write a successor and `git rm` the old. For decision entries, the old entry stays — it's part of the history of why the project arrived where it is. Replace becomes a two-step supersession: + +1. **Write the new decision entry** — a fresh `knowledge/decisions/<slug>-<date>.md` describing the current choice, what changed in the constraint, and why the prior decision no longer applies. Optionally include `alternatives_considered` listing both the original alternatives and the prior decision itself (now also rejected). Include `related_to: [<old-id>]` for traceability. +2. **Mark the old entry superseded** — Edit the old entry's frontmatter to set `decision_status: superseded` and `superseded_by: <new-entry-id>`. Body untouched. Do **not** `git rm` — the historical record stays on disk. + +When autofix evidence is insufficient to write the successor decision (the constraint clearly dissolved but the new approach is too unstable to commit to), mark the old entry stale via `flowctl memory mark-stale` instead of half-shipping a supersession. The user (or a follow-up audit) can revisit when the new approach has settled. + +### Edge cases + +- A decision whose `decision_status` is `proposed` but never reached `accepted` (the project never committed) → if no code reflects the proposal, classify Delete; if partial implementation exists, mark stale and surface in the report. +- A decision that references a constraint visible only in external context (a contract, a partner integration, a regulatory rule) → audit cannot verify the constraint from code alone. Skip with a "cannot mechanically verify" note in the report; do not auto-Delete. +- A decision pointing at `superseded_by: <id>` where the successor itself is now superseded → walk the chain; the audit target is the head of the chain. + +--- + +## Glossary scan (parallel to memory audit) + +Glossary terms are not memory entries — they live in `GLOSSARY.md` files at the repo root and (optionally) under subdirectories. The audit walks them in [Phase 0.5](workflow.md) of the workflow. The 5-outcomes table doesn't apply directly; the per-term decisions are simpler: + +| Outcome | Meaning for a glossary term | Action | +|---------|-----------------------------|--------| +| **Keep** | Term has hits in tracked code (case-insensitive whole-word match) | No edit | +| **Mark stale** | Zero hits for the term AND zero hits for any `_Avoid_` alias | Edit tool: append `<!-- stale: <reason> -->` HTML comment after the term heading | +| **Alias-creep** | An `_Avoid_` alias has hits in code | Phase 3 question (interactive) or stale-flag note (autofix) — propose renaming code uses to the canonical term, or moving the alias out of `_Avoid_` | + +There is no `flowctl glossary mark-stale` subcommand. Stale-marking is an Edit-tool operation only. The agent must **never delete** the term entry on stale-detection — deletion is the operator's call, surfaced as a recommendation in the report. + +### Husk awareness + +A glossary file with `count: 0` from `flowctl glossary list --json` is a husk — `# Glossary` H1 with no terms after the last term was removed. Husks have no terms to audit; skip the walk for that file and surface a single advisory in Phase 5: + +``` +GLOSSARY.md at <path> is an empty husk (no terms defined). +Remove the file manually if it's no longer needed; flow-next keeps it as +project state per fn-38 R18. +``` + +The audit never deletes the file. Removing it is a project decision, not a memory-audit decision. + +--- + ## Mark stale (autofix ambiguous + Replace-insufficient) **Not** one of the 5 outcomes — it's the autofix-mode escape hatch and the Replace-insufficient-evidence fallback. Surface in the report under "Marked stale" with the reason. @@ -259,6 +336,12 @@ Re-mark-stale on an already-stale entry updates `last_audited` + `audit_notes`. ## Decision tree (quick reference) ``` +Is the entry under knowledge/decisions/? + yes → use the Decision-entry calibration block above + (judging question = "does the constraint still hold?"; + Replace = supersede, not git rm) + no → continue with the standard tree below + Is the entry's referenced code AND problem domain both gone? yes → Delete (auto-applicable when ALL auto-Delete criteria hold) no → continue @@ -279,3 +362,5 @@ Are there reference drifts (paths, modules, links, snippets)? ``` In autofix mode, replace any "ask user" branch with mark-stale. + +For glossary terms (separate from memory entries — see [Glossary scan](#glossary-scan-parallel-to-memory-audit) above): the tree is `code-hit? → Keep`; `no code-hit AND no alias-hit? → mark stale via Edit tool`; `alias hit in code? → Phase 3 question (interactive) or stale-flag note (autofix)`. diff --git a/plugins/flow-next/skills/flow-next-audit/workflow.md b/plugins/flow-next/skills/flow-next-audit/workflow.md index 50611ee7..13e511db 100644 --- a/plugins/flow-next/skills/flow-next-audit/workflow.md +++ b/plugins/flow-next/skills/flow-next-audit/workflow.md @@ -41,6 +41,8 @@ For each kept path, read the frontmatter (parser pattern from `prospect/workflow If the entry's `status` is `stale` already, surface it in the report under "Already stale" and skip investigation in autofix mode (mark-stale is idempotent — re-marking adds noise). In interactive mode, offer to refresh-investigate (rare path; user-driven). +**Decisions are auto-walked.** `MEMORY_CATEGORIES["knowledge"]` includes `decisions` (fn-38 schema extension), so the glob in §0.1 picks up `.flow/memory/knowledge/decisions/*.md` automatically — no separate phase. Decision entries get a calibrated judging question and a different `Replace` shape; see [phases.md](phases.md) §Decision-entry calibration. Decision-specific frontmatter (`decision_status`, `superseded_by`, `alternatives_considered`) is captured into the entry record for Phase 1 to use; entries with `decision_status: superseded` are surfaced as historical record and skipped (the audit target is the successor, not the superseded entry). + ### 0.2 — Detect legacy flat files ```bash @@ -128,6 +130,155 @@ Options: --- +## Phase 0.5: Glossary scan + +**Goal:** for every glossary file on the ancestor chain, verify each term has at least one usage in tracked code (term itself or any `_Avoid_` alias). Mark stale on absence; surface alias-creep as a Phase 3 signal. + +This phase runs in parallel concept to the memory walk — same audit invocation, separate scope. Glossary files are project state (not flow-next bookkeeping; see fn-38 R18). Skip the phase entirely when `flowctl glossary list --json` reports zero files. + +### 0.5.1 — Enumerate glossaries + +Use the flowctl helper as the single source of truth: + +```bash +GLOSSARY_JSON="$("$FLOWCTL" glossary list --json 2>/dev/null || echo '{"groups":[],"file_count":0,"total_terms":0}')" +``` + +JSON shape (fn-38 task 2): + +```json +{ + "groups": [ + { + "path": "/abs/path/GLOSSARY.md", + "entries": [ + { + "term": "<canonical>", + "definition": "<one-line>", + "avoid": ["<alias-1>", "<alias-2>"], + "relates_to": ["<other-term>"] + } + ], + "count": 1 + } + ], + "file_count": 1, + "total_terms": 1 +} +``` + +When `file_count == 0`, skip Phase 0.5 entirely. When `total_terms == 0` but `file_count > 0`, every group is a husk (see §0.5.4). + +### 0.5.2 — Per-term code search + +For each `(group, entry)` where `count > 0`: + +1. **Build the search corpus** — tracked source files only. Use `git ls-files` to honor `.gitignore`; exclude `.flow/`, the glossary file itself, and known build artifacts: + + ```bash + git -C "$REPO_ROOT" ls-files -z \ + | grep -zvE '^\.flow/|/GLOSSARY\.md$|^GLOSSARY\.md$|/node_modules/|/\.git/' \ + > /tmp/glossary-corpus.zlist + ``` + + On platforms where Bash file ops gate behind permissions, the host agent should fall back to Glob with the equivalent exclusion pattern. + +2. **Search for the term** — case-insensitive, whole-word match (matches T2's `_glossary_term_matches` invariant). Normalize whitespace in the term first (collapse runs of whitespace to a single space), then anchor with `\b`: + + ```bash + TERM_NORM="$(printf '%s' "$term" | tr -s '[:space:]' ' ')" + TERM_HITS=$(xargs -0 grep -liEw -- "$(printf '%s' "$TERM_NORM" | sed 's/[][\.*^$\/]/\\&/g')" \ + < /tmp/glossary-corpus.zlist 2>/dev/null | wc -l | tr -d ' ') + ``` + + The agent may also use the Grep tool directly with an equivalent pattern; either path is fine. + +3. **Search for each `_Avoid_` alias** — same matching rule. Aggregate alias hits per-alias so the report can name the offending alias. + +4. **Decide:** + + | Term hits | Any alias hits | Outcome | + |-----------|----------------|---------| + | ≥1 | (n/a) | **Keep** — record reviewed-without-change | + | 0 | 0 | **Mark stale** — Edit tool, append HTML comment after the term heading | + | 0 | ≥1 | **Mark stale + alias-creep flag** — same Edit, plus surface to Phase 3 (interactive) or report (autofix) | + | ≥1 | ≥1 | **Alias-creep flag only** — term is alive but an alias is being used in code; do not mark stale | + +### 0.5.3 — Stale-marking via Edit tool + +There is no `flowctl glossary mark-stale` subcommand. fn-38 task 2 shipped only `add / list / read / remove`; stale-marking is an Edit-tool operation on the glossary file directly. + +The Edit appends an HTML comment immediately after the term heading line (preserves the body untouched, never deletes the entry). The comment lives between the heading and the definition paragraph so a casual reader sees it and `flowctl glossary list` still parses cleanly: + +```text +## <Term> + +<!-- stale: zero hits in tracked code on <YYYY-MM-DD> (audited-by: /flow-next:audit) --> + +<one-line definition> + +_Avoid_: alias-1, alias-2 +``` + +Idempotency: when the heading already has a `<!-- stale: ... -->` comment immediately following, replace the comment in place rather than stacking. Use `Edit` with `old_string` matching the existing comment line. + +**The agent must not delete the term entry on stale-detection.** Deletion is the operator's call. The audit surfaces it as a Phase 5 recommendation: + +``` +Recommended manual review: GLOSSARY.md term "<term>" has no code hits. +Stale comment added; consider `flowctl glossary remove <term>` if the concept is gone. +``` + +### 0.5.4 — Husk awareness + +A glossary file with `count: 0` (the file is `# Glossary` H1 followed by no term entries — left intact after the last term was removed; see fn-38 task 2 R18) skips the per-term walk. Surface a single Phase 5 advisory per husk: + +``` +GLOSSARY.md at <relative path> is an empty husk (no terms defined). +flow-next keeps it as project state per fn-38 R18 — remove it manually if no +longer needed. +``` + +The audit never deletes the file. + +### 0.5.5 — Alias-creep handling + +When a term has alias hits in code (whether or not the canonical term also has hits): + +- **Interactive (Phase 3):** present per alias as a question. Lead with the recommendation: + + ``` + Glossary term: "<term>" (defined in <relative path>) + _Avoid_ alias "<alias>" appears in tracked code at <file:line> (and N other locations). + + Options: + 1. Rename the code uses to "<term>" (recommended) + 2. Drop "<alias>" from the _Avoid_ list (alias is now acceptable) + 3. Skip — surface in report only + ``` + + Option 1 is a code-edit recommendation only — the audit reports the locations; the operator handles the rename. (Mass-renaming code from a memory audit is out of scope.) + Option 2 is an Edit on the glossary file: remove the alias from the `_Avoid_` list while preserving the rest of the entry. + +- **Autofix:** never auto-rename code. Surface the alias-creep finding in the report under "Recommended" with file:line locations. The agent does not Edit the glossary unless the term itself is also stale (in which case the stale comment captures the alias-creep too). + +### 0.5.6 — Carry into Phase 5 report + +Capture the per-term outcomes into a glossary section of the report (see §5.1 below). Counts: + +- `glossary_kept` — terms with code hits. +- `glossary_marked_stale` — terms with zero code hits and zero alias hits, stale comment applied. +- `glossary_alias_creep` — terms whose `_Avoid_` aliases hit code (regardless of canonical hit count). +- `glossary_husks` — files with `count: 0`. + +### Done when + +- Every glossary group with `count > 0` has every term decided (Keep / mark stale / alias-creep). +- Every husk file has a queued advisory. +- The orchestrator has a glossary-side decision map alongside the memory-side investigation map. + +--- + ## Phase 1: Investigate (per entry) **Goal:** for each entry in scope, verify its claims against the current codebase and form a recommendation with evidence. @@ -416,6 +567,25 @@ When evidence is insufficient: 2. Report what evidence was found and what's missing. 3. Recommend the user run a domain-specific solve afterwards to capture fresh context. +**Replace flow for `knowledge/decisions/` entries** — the old entry is **not** `git rm`'d. Decision history stays on disk. Two-step supersession: + +1. Subagent (or orchestrator on the main thread for short successors) writes the new decision entry under `.flow/memory/knowledge/decisions/<slug>-<date>.md`. Include `related_to: [<old-id>]` and, when known, `alternatives_considered` listing both the original alternatives and the prior decision (now also rejected). +2. Orchestrator edits the old entry's frontmatter via Write tool: set `decision_status: superseded` and `superseded_by: <new-entry-id>`. Body untouched. Other frontmatter fields preserved (round-trip rules from §4.2 apply). + +Insufficient evidence on a decision Replace routes to mark-stale on the old entry — same path as non-decision Replace, but the operator's follow-up is "draft the new decision when the constraint settles" rather than "research the new code shape." + +### 4.4.1 — Glossary stale-marking (Phase 0.5 outcomes) + +For each glossary term flagged "Mark stale" in Phase 0.5, the orchestrator applies the Edit on the main thread (no subagent — short, focused edits): + +1. Open the glossary file via Read. +2. Edit the line immediately after the `## <Term>` heading. If a `<!-- stale: ... -->` comment already exists there, replace it (idempotent re-mark). Otherwise insert it as a new line above the definition paragraph. +3. The comment text is `<!-- stale: zero hits in tracked code on <YYYY-MM-DD> (audited-by: /flow-next:audit) -->`. + +Glossary edits stage in the same git context as memory edits (Phase 5 picks the commit strategy uniformly across both). + +For alias-creep findings without a stale-flag (term has hits, but `_Avoid_` alias also has hits), the orchestrator does **not** edit the glossary in autofix mode. Interactive mode may edit only if the user picks "Drop the alias from `_Avoid_`" in Phase 3. Code renames are out of scope — the audit reports file:line locations and stops there. + ### 4.5 — Delete flow ```bash @@ -465,6 +635,14 @@ Replaced: <Z> Deleted: <W> Marked stale: <S> Skipped (no decision): <U> + +Glossary +-------- +Files scanned: <file_count> (<husk_count> husks) +Terms scanned: <total_terms> +Kept: <glossary_kept> +Marked stale: <glossary_marked_stale> +Alias-creep flagged: <glossary_alias_creep> ``` Then per-entry detail (one block each): @@ -478,10 +656,23 @@ Then per-entry detail (one block each): Action: <what was done — file edits, deletions, mark-stale calls> [Consolidate only] Canonical: <entry_id>; merged: [<list>]; deleted: [<list>] [Replace only] Old guidance: <one-line>; New entry: <new_id> + [Decision Replace] Successor: <new_id>; old marked decision_status=superseded (NOT git-rm'd) ``` For **Keep** outcomes, group under a "Reviewed without edits" subsection so the result is visible without git churn. +Then per-glossary-term detail (only for stale + alias-creep cases — Keep is silent): + +``` +- <relative-path>:<term> + Outcome: <Marked stale|Alias-creep|Marked stale + alias-creep> + Term hits: <N> + Alias hits: <alias-1>: <N1>, <alias-2>: <N2> + Action: <Edit applied|None — recommendation only> +``` + +Husk advisories (one per file with `count: 0`) follow under a "Glossary husks" subsection. + ### 5.2 — Autofix two-section split In autofix mode, split actions into: @@ -685,14 +876,15 @@ If step 6.4 produced an instruction-file edit AND Phase 5 already committed audi The skill itself is markdown — there's no unit-test surface. The validation is invoking `/flow-next:audit` in a real session. Expected behavior: -- Phase 0 walks `.flow/memory/`, lists per-cluster counts, reports legacy skip count if `pitfalls.md` etc. exist. +- Phase 0 walks `.flow/memory/`, lists per-cluster counts, reports legacy skip count if `pitfalls.md` etc. exist. Decision entries (`knowledge/decisions/`) are picked up automatically once the schema extension lands (fn-38 task 1). +- Phase 0.5 walks every `GLOSSARY.md` on the ancestor chain via `flowctl glossary list --json`, greps tracked code per-term + per-`_Avoid_` alias, marks zero-hit terms stale via Edit tool with `<!-- stale: ... -->`, surfaces alias-creep, advises on husks. - Phase 1 produces evidence per entry. For 3+ entries, parallel investigation subagents run. -- Phase 2 classifies; Replace candidates with insufficient evidence reclassify as mark-stale. -- Phase 3 (interactive) groups Keeps / Updates for batched confirmation; presents Consolidate / Replace / Delete individually via blocking-question tool. -- Phase 4 executes via Write / `flowctl memory mark-stale` / `git rm`. -- Phase 5 prints the report; offers commit options based on git context. +- Phase 2 classifies; Replace candidates with insufficient evidence reclassify as mark-stale. Decision entries use the calibrated judging question and the supersede shape for Replace. +- Phase 3 (interactive) groups Keeps / Updates for batched confirmation; presents Consolidate / Replace / Delete and glossary alias-creep individually via blocking-question tool. +- Phase 4 executes via Write / `flowctl memory mark-stale` / `git rm`. Decision Replace = supersede (write new + edit old's `decision_status` + `superseded_by`; never `git rm`). Glossary stale = Edit comment after term heading. +- Phase 5 prints the report (memory section + glossary section + husk advisories); offers commit options based on git context. - Phase 6 checks CLAUDE.md / AGENTS.md for `.flow/memory/` mention; offers minimal addition if missing. -In autofix mode (`/flow-next:audit mode:autofix`), Phase 3 is skipped, ambiguous entries are marked stale, and the report is the sole deliverable. +In autofix mode (`/flow-next:audit mode:autofix`), Phase 3 is skipped, ambiguous entries are marked stale, glossary alias-creep surfaces as a recommendation only, and the report is the sole deliverable. -If Phase 0 produces nothing (no categorized entries, only legacy), the skill exits cleanly with the legacy-skip count. +If Phase 0 produces nothing (no categorized entries, only legacy) AND Phase 0.5 produces nothing (no glossary files), the skill exits cleanly with the legacy-skip count. diff --git a/plugins/flow-next/skills/flow-next-interview/SKILL.md b/plugins/flow-next/skills/flow-next-interview/SKILL.md index d503f683..65d5844d 100644 --- a/plugins/flow-next/skills/flow-next-interview/SKILL.md +++ b/plugins/flow-next/skills/flow-next-interview/SKILL.md @@ -58,6 +58,53 @@ If empty, ask: "What should I interview you about? Give me a Flow ID (e.g., fn-1 FLOWCTL="${DROID_PLUGIN_ROOT:-${CLAUDE_PLUGIN_ROOT}}/scripts/flowctl" ``` +### Parse `--docs` / `--no-docs` flags + +Strip `--docs` / `--no-docs` from `$ARGUMENTS` before input-type detection so they don't get confused for a Flow ID or path: + +```bash +RAW_ARGS="$ARGUMENTS" +DOC_AWARE_FORCE="" # "" = autodetect, "on" = forced on, "off" = forced off +if [[ "$RAW_ARGS" == *"--no-docs"* ]]; then + DOC_AWARE_FORCE="off" + RAW_ARGS="${RAW_ARGS//--no-docs/}" +elif [[ "$RAW_ARGS" == *"--docs"* ]]; then + DOC_AWARE_FORCE="on" + RAW_ARGS="${RAW_ARGS//--docs/}" +fi +RAW_ARGS=$(printf "%s" "$RAW_ARGS" | tr -s ' ' | sed 's/^ //;s/ $//') +# RAW_ARGS now contains the Flow ID / file path / empty. +``` + +`--docs` and `--no-docs` are mutually exclusive; if the user passes both, `--no-docs` wins (the `if/elif` checks `--no-docs` first). The `--docs` token gets left in the residual `RAW_ARGS` after stripping, which surfaces downstream as an unrecognized argument — loud failure beats silent acceptance of conflicting state. + +### Doc-aware autodetect + +Decide whether doc-aware mode (behaviors a-d below) activates. Three paths: + +1. **Forced on** (`--docs` flag): `DOC_AWARE=1`. Lazy-creates root `GLOSSARY.md` on first term resolution via `flowctl glossary add` (writes to nearest-ancestor or repo root when no ancestor exists). +2. **Forced off** (`--no-docs` flag): `DOC_AWARE=0`. Skip behaviors a-d entirely, even if artifacts exist. +3. **Autodetect** (no flag): activate when `GLOSSARY.md` has at least one defined term OR any decision entry exists. + +```bash +DOC_AWARE=0 +if [[ "$DOC_AWARE_FORCE" == "on" ]]; then + DOC_AWARE=1 +elif [[ "$DOC_AWARE_FORCE" == "off" ]]; then + DOC_AWARE=0 +else + TERMS=$("$FLOWCTL" glossary list --json 2>/dev/null | jq -r '.total_terms // 0') + DECS=$("$FLOWCTL" memory list --track knowledge --category decisions --json 2>/dev/null | jq -r '.entries | length // 0') + if [[ "${TERMS:-0}" -gt 0 || "${DECS:-0}" -gt 0 ]]; then + DOC_AWARE=1 + fi +fi +``` + +**Why `total_terms > 0` rather than `[[ -f GLOSSARY.md ]]`:** `flowctl glossary remove` leaves a `# Glossary` H1 husk on disk after the last term is removed (the file is project state, intentionally retained). A presence-only check would false-positive on an empty husk and surface phantom doc-aware questions when no canonical vocabulary is actually defined. `glossary list --json` walks the file and counts populated entries; `total_terms == 0` for a husk. + +When `DOC_AWARE=1`, the four behaviors below layer onto the standard interview workflow. When `DOC_AWARE=0`, the interview proceeds exactly as today. + ## Detect Input Type 1. **Flow epic ID pattern**: matches `fn-\d+(-[a-z0-9-]+)?` (e.g., fn-1-add-oauth, fn-12, fn-2-fix-login-bug) @@ -137,10 +184,145 @@ Example flow: Before every question, classify it via the [questions.md](questions.md) **Pre-Question Taxonomy**: - **Codebase-answerable** ("what exists / how it's wired / what conventions live here") → use Read / Grep / Glob to answer; log to spec's `## Resolved via Codebase` section with file:line evidence. +- **Glossary-lookup-answerable** (`DOC_AWARE=1` only) — terms with a canonical entry in the nearest-ancestor `GLOSSARY.md` → silently resolve from the entry; log to spec's `## Glossary Conflicts` section only when the user's wording diverges from canonical AND the term is load-bearing (see behavior (a) below). - **User-judgment-required** ("what should exist / what tradeoff to make / what priority") → ask via `AskUserQuestion`. If you find yourself answering a "should" question via grep, that's the bug. Stop and ask the user. +#### Code-versus-assertion contradiction (`DOC_AWARE=1` — behavior (c)) + +When grep / Read reveals the code disagrees with something the user asserted ("we already have X at path Y" but Y is gone, or "the auth flow uses OAuth" but the code uses API keys), do **not** silently log under `## Resolved via Codebase`. Surface the contradiction as an `AskUserQuestion`: + +- **header**: `Code mismatch?` +- **body**: `Code shows <X> at <file:line>; you said <Y>. Recommended: <treat-code-as-source-of-truth | update-spec-to-match-code | revisit-the-area>. Confidence: [<tier>].` +- **options**: frozen — `match-code` (revise spec to align with what's there), `update-code` (treat the assertion as the goal; flag the divergence as a task), `clarify` (user explains; agent re-investigates with new context). + +Confidence tier: `[high]` when grep evidence is unambiguous (file does not exist, function signature is clearly different); `[judgment-call]` when interpretation is at play (similar names, partial overlap, recent rename). Never silently pick a side — the user owns the resolution. + +The bar for surfacing: a meaningful contradiction that affects spec correctness. If the user says "the validator returns boolean" and grep shows it returns `Result<bool, Error>`, surface. If the user paraphrases a function's role and grep shows the role matches but the implementation differs in unrelated detail, log under `## Resolved via Codebase` and move on. + +## Doc-aware behaviors (`DOC_AWARE=1` only) + +When `DOC_AWARE=1`, four behaviors layer onto the standard interview workflow. When `DOC_AWARE=0`, skip this entire section. + +### Behavior (a) — Phase-zero glossary scan + +Before drafting the first question batch, run a glossary scan against the user's request. + +```bash +"$FLOWCTL" glossary list --json +``` + +JSON shape: + +```json +{ + "groups": [ + { + "path": "GLOSSARY.md", + "entries": [ + { "term": "Worker", "definition": "...", "avoid": ["consumer"], "relates_to": ["Queue"] } + ], + "count": 1 + } + ], + "file_count": 1, + "total_terms": 1 +} +``` + +For each defined term across `groups[].entries`, scan the user's request for occurrences. Term match is **case-insensitive whitespace-collapsed** — the same rule as `flowctl glossary read` (see `_glossary_term_matches` in `flowctl.py:401`). Do NOT reinvent matching logic; the canonical contract is "lowercase both sides, collapse runs of whitespace to single space, compare equal." Alias hits via `entries[].avoid`: if the user wrote `consumer` and the entry's `avoid` list contains `consumer`, that's a canonical-mismatch hit on `Worker`. + +For each hit, evaluate one filter before surfacing: + +- **Is the term load-bearing for this spec?** Casual passing mention does not trigger; mention that defines behavior or shapes acceptance does. The user wrote "the worker fetches the queue" mid-sentence about deployment — passing mention, no question. The user wrote "we need a new kind of worker that processes batches" — load-bearing, surface. + +When a hit passes the load-bearing filter AND the user's wording conflicts with canonical (alias used instead of canonical, or definition contradicts), surface as the **first interview question** via `AskUserQuestion`: + +- **header**: `Term mismatch?` +- **body**: `You used "<user-wording>"; GLOSSARY.md defines "<canonical>" as "<one-line definition>". Recommended: <use-canonical | redefine | this-is-different>. Confidence: [<tier>].` +- **options**: frozen — `use-canonical` (the user meant the existing term; spec uses canonical wording), `redefine` (user is updating the term meaning; spec proceeds with new wording, agent will re-write `GLOSSARY.md` via `flowctl glossary add` after the interview), `this-is-different` (the words collide but the concepts differ; spec uses a fresh disambiguating term — capture in `## Glossary Conflicts`). + +Confidence tier: `[high]` when the canonical entry is recent and the user's wording cleanly maps to an `avoid` alias; `[judgment-call]` when meaning could plausibly have drifted; `[your-call]` when the term sits in user-domain territory the agent has no purchase on. + +**Throttle:** at most one Phase-zero glossary question per interview turn. If multiple terms hit, surface the most load-bearing one first; the rest fold into the natural conversation flow as they come up. Bombarding the user with vocabulary questions before the core spec questions is the failure mode this filter prevents. + +### Behavior (b) — Fuzzy-term sharpening + +Across the conversation, watch for overloaded language — words the user keeps using whose meaning could plausibly shift between turns ("workflow", "session", "task" when a Flow `task` already has meaning, etc.). When you spot one: + +1. Propose a canonical via `AskUserQuestion`: + - **header**: `Sharpen "<term>"?` + - **body**: `You've used "<term>" in <count> turns. I'm reading it as "<agent's working definition>" but want to lock it in. Recommended: <X> — <one-sentence rationale>. Confidence: [<tier>].` + - **options**: 2-4 candidate canonical wordings + `none-of-these` (user provides their own). + +2. On user-pick, build the resolved entry and write it to the nearest-ancestor `GLOSSARY.md` via `flowctl glossary add`: + + ```bash + "$FLOWCTL" glossary add "<term>" --definition-file - --json <<EOF + <user-resolved one-line or short paragraph definition> + EOF + ``` + + Use `--definition-file -` (stdin) so multi-sentence definitions and quoted phrasing round-trip cleanly. `glossary add` is upsert — case-insensitive match replaces the existing entry in full; new terms append at the end of the file. If the user picked `redefine` in behavior (a), this is the same call site (one path, one upsert). + +3. The next question can re-read the glossary. There is no in-memory cache to invalidate — re-read on every doc-aware turn that needs canonical lookup. The cost is one stat + one file read per turn; sub-millisecond at typical sizes. + +**When to skip behavior (b):** if a term is single-use, or if the user volunteered a clear definition the first time they used it, or if the conversation is short enough (≤6 turns) that consolidation buys nothing yet. The behavior triggers when overloading is real and persistent, not on every undefined word. + +### Behavior (d) — Decision-record write (three-criteria gate) + +When the interview surfaces a choice the user is making — not just a fact about the system, a real **decision** — evaluate the three-criteria gate before drafting a memory entry. + +**The three-criteria gate** (all three must hold): + +1. **Hard-to-reverse** — undoing this later costs more than redoing it now. Schema choices, public API shapes, integration boundaries qualify; cosmetic preferences and easily-toggled flags do not. +2. **Surprising-without-context** — a future maintainer reading the result without history would ask "why this and not the obvious thing?". Anything that follows the standard pattern of the surrounding code is not surprising. +3. **Real trade-off** — there was a genuine alternative that lost. If there was no real alternative, it isn't a decision; it's a fact. + +If any of the three fails, do NOT write a decision entry. Note the choice in the spec's prose body (e.g. `## Decision Context`) and move on. The bar exists because the decisions store decays fast when filled with non-decisions. + +When all three hold: + +1. **Draft the entry** in agent memory (do not write yet). Shape: + - **Title** (1 line, ≤80 chars): the decision in noun-phrase form (e.g. "Nearest-ancestor walk for glossary lookup"). + - **Body** (1-3 sentences floor; longer when warranted): + - 1 sentence on what was chosen. + - 0-1 sentences on why. + - Optional `## Considered Options` block listing rejected alternatives with one-line reasons each. + - Optional `## Consequences` block listing what this commits the project to. + - **Module** (optional): the file or subsystem the decision shapes. + - **Tags** (optional): comma-separated, e.g. `glossary,resolution,walk`. + +2. **Show the draft via `AskUserQuestion` before writing** — same pattern as `/flow-next:capture` Phase 4 read-back: + - **header**: `Write decision?` + - **body**: `Drafted decision entry: <title>. Body: <one-line summary>. Recommended: approve — <one-sentence rationale why all three gate criteria hold>. Confidence: [<tier>].` + - **options**: frozen — `approve` (write), `edit` (user revises title / body / module / tags via follow-up), `skip` (do not write; the choice stays in spec prose only). + + Show the full body inline in the question or in the message preceding it; the user must be able to read what they're approving. Never write silently — even when the gate cleanly passes, the user owns the final write. + +3. **On `approve`**, call: + + ```bash + "$FLOWCTL" memory add \ + --track knowledge \ + --category decisions \ + --title "<title>" \ + --module "<module>" \ + --tags "<tags>" \ + --body-file - <<EOF + <body markdown> + EOF + ``` + + The `decisions` category is registered in flowctl's memory schema (Task 1 of this epic). Optional fields `--decision-status` (default `accepted`), `--superseded-by`, and `--alternatives-considered` are available; pass them when the conversation supplies them and skip otherwise. + +4. **On `edit`**, ask one follow-up `AskUserQuestion` for which field changes (title / body / module / tags), capture the revision, re-show the draft, loop. Hard cap at 2 edit cycles before defaulting to `approve` / `skip`. + +5. **On `skip`**, do nothing — the choice still appears in spec prose; only the memory entry is suppressed. + +**At most one decision write per interview turn.** Even if multiple gate-passing decisions surface, ask one at a time; subsequent asks adapt to the user's energy level for read-back. + ## Question Categories Read [questions.md](questions.md) for all question categories and interview guidelines. @@ -181,6 +363,10 @@ Decisions made during interview (e.g., "Use OAuth not SAML", "Support mobile + w (optional — omit if nothing was resolved this way during the interview) Items the agent answered via Read / Grep / Glob, with file:line evidence. Separate from items the user answered. Lets reviewers spot-check assumptions later. +## Glossary Conflicts +(optional — only when DOC_AWARE=1 surfaced behavior-(a) hits during the interview) +Per-term: user-wording vs. canonical term, the resolution chosen (use-canonical / redefine / this-is-different), file:line of the canonical entry. Lets reviewers see where vocabulary tightened. + ## Open Questions Unresolved items that need research during planning @@ -221,6 +407,10 @@ Decisions made during interview (optional — omit if nothing was resolved this way during the interview) Items the agent answered via Read / Grep / Glob, with file:line evidence. Separate from items the user answered. +## Glossary Conflicts +(optional — only when DOC_AWARE=1 surfaced behavior-(a) hits during the interview) +Per-term: user-wording vs. canonical term, the resolution chosen, file:line of the canonical entry. + ## Open Questions Unresolved items @@ -277,6 +467,7 @@ Show summary: - Number of questions asked - Key decisions captured - What was written (Flow ID updated / file rewritten) +- Doc-aware mode (when `DOC_AWARE=1` was active): glossary terms added/updated via `flowctl glossary add`, decision entries written via `flowctl memory add --track knowledge --category decisions`, glossary conflicts captured under `## Glossary Conflicts` Suggest next step based on input type: - New idea / epic without tasks → `/flow-next:plan fn-N` diff --git a/plugins/flow-next/skills/flow-next-interview/questions.md b/plugins/flow-next/skills/flow-next-interview/questions.md index 56285d99..e6e799da 100644 --- a/plugins/flow-next/skills/flow-next-interview/questions.md +++ b/plugins/flow-next/skills/flow-next-interview/questions.md @@ -4,21 +4,27 @@ Ask NON-OBVIOUS questions only. Expect 40+ questions for complex specs. ## Pre-Question Taxonomy -Before asking any question, classify it: +Before asking any question, classify it on three axes: | Category | Who answers | Examples | |----------|-------------|----------| | **Codebase-answerable** | Agent (Read / Grep / Glob) | "What persistence layer is used?" / "Where do existing routes live?" / "What's the test framework?" | +| **Glossary-lookup-answerable** (`DOC_AWARE=1` only) | Agent (`flowctl glossary read`) | "What does this project mean by 'worker'?" / "Is 'session' the canonical term here, or is it 'connection'?" | | **User-judgment-required** | User (`AskUserQuestion`) | "Should we add caching?" / "What's the priority for offline support?" / "Is performance or simplicity more important here?" | -**Rule of thumb:** +**Rules of thumb:** -- "What exists / how is it wired / what conventions live here" → agent investigates, doesn't ask. +- "What exists / how is it wired / what conventions live here" → agent investigates the codebase, doesn't ask. +- "What does the project's canonical vocabulary call this?" → agent looks up the nearest-ancestor `GLOSSARY.md` (when `DOC_AWARE=1`), surfaces only when (a) no canonical entry exists and the term is overloaded (behavior (b) — fuzzy-term sharpening), or (b) the user's wording conflicts with canonical AND the term is load-bearing (behavior (a) — phase-zero scan). - "What should exist / what tradeoff to make / what priority" → user decides, agent asks. **If you find yourself answering a "should" question via grep, that's the bug.** Stop and ask the user. -**Audit trail:** every question the agent answered via codebase exploration goes into the spec's `## Resolved via Codebase` section (separate from items the user answered). Cite file:line evidence so reviewers can spot-check assumptions later — especially important when the agent's "I checked" turns out to be "I assumed." +**Audit trail:** + +- Codebase-resolved items → `## Resolved via Codebase` section with file:line evidence. +- Glossary-conflict-resolved items (when behavior (a) fired) → `## Glossary Conflicts` section with the user-wording, canonical term, and resolution. +- Both sections are separate from items the user answered. Cite evidence so reviewers can spot-check assumptions later — especially important when the agent's "I checked" turns out to be "I assumed." ## Technical Implementation diff --git a/plugins/flow-next/skills/flow-next-sync/SKILL.md b/plugins/flow-next/skills/flow-next-sync/SKILL.md index 1cf2cd32..5a97c201 100644 --- a/plugins/flow-next/skills/flow-next-sync/SKILL.md +++ b/plugins/flow-next/skills/flow-next-sync/SKILL.md @@ -94,7 +94,24 @@ No downstream tasks to sync (all done or none exist). ``` Stop here (success, nothing to do). -### Step 5: Spawn Plan-Sync Agent +### Step 5: Gather glossary + decisions context + +Two extra context types help the agent catch drift the spec text alone can't reveal: project-glossary terms (renames where the old spec used a term whose `_Avoid_` alias now appears in code) and active decision constraints (current code may touch files mentioned in a decision's `Consequences` section). + +```bash +GLOSSARY_JSON="$("$FLOWCTL" glossary list --json 2>/dev/null \ + || echo '{"groups":[],"file_count":0,"total_terms":0}')" +DECISIONS_JSON="$("$FLOWCTL" memory list --track knowledge --category decisions --json 2>/dev/null \ + || echo '{"entries":[],"legacy":[],"count":0,"status":"active"}')" +``` + +Both calls are best-effort — empty defaults keep the agent prompt valid when flowctl returns nothing or fails. + +When `GLOSSARY_JSON` reports `file_count == 0` AND `DECISIONS_JSON` reports `count == 0`, skip the extra context (pass the empty defaults — the agent treats them as a no-op signal). + +When `GLOSSARY_JSON.total_terms == 0` but `file_count > 0`, every group is a husk. Husks carry no signal for drift detection — pass the JSON through untouched and let the agent skip them. + +### Step 6: Spawn Plan-Sync Agent Build context and spawn via Task tool: @@ -107,16 +124,19 @@ EPIC_ID: <epic id> DOWNSTREAM_TASK_IDS: <comma-separated list from step 4> DRY_RUN: <true|false> +GLOSSARY_JSON: <output of `flowctl glossary list --json` from step 5> +DECISIONS_JSON: <output of `flowctl memory list --track knowledge --category decisions --json` from step 5> + <if DRY_RUN is true> DRY RUN MODE: Report what would change but do NOT use Edit tool. Only analyze and report drift. </if> ``` -Use Task tool with `subagent_type: flow-next:plan-sync` +Use Task tool with `subagent_type: flow-next:plan-sync` (sync-codex.sh rewrites `Task` to `spawn_agent` for the Codex mirror). **Note:** `COMPLETED_TASK_ID` is always provided - for task-mode it's the input task, for epic-mode it's the source task selected in Step 4. -### Step 6: Report Results +### Step 7: Report Results After agent returns, format output: diff --git a/plugins/flow-next/templates/memory/README.md.tpl b/plugins/flow-next/templates/memory/README.md.tpl index 2c5595c8..e344f467 100644 --- a/plugins/flow-next/templates/memory/README.md.tpl +++ b/plugins/flow-next/templates/memory/README.md.tpl @@ -21,6 +21,7 @@ Structure: tooling-decisions/ workflow/ best-practices/ + decisions/ ``` Each entry is a markdown file with YAML frontmatter. Filename convention: @@ -37,6 +38,7 @@ Each entry is a markdown file with YAML frontmatter. Filename convention: Required (all tracks): `title`, `date`, `track`, `category`. Bug track adds: `problem_type`, `symptoms`, `root_cause`, `resolution_type`. Knowledge track adds: `applies_when`. +Decisions category (knowledge) adds optional: `decision_status` (proposed | accepted | superseded), `superseded_by`, `alternatives_considered`. Optional: `module`, `tags`, `status`, `stale_reason`, `stale_date`, `last_updated`, `related_to`. ## Commands diff --git a/scripts/sync-codex.sh b/scripts/sync-codex.sh index bf1c7ca1..f949ea52 100755 --- a/scripts/sync-codex.sh +++ b/scripts/sync-codex.sh @@ -771,6 +771,27 @@ else echo -e " ${GREEN}✓${NC} No Claude-native tool refs in Codex skill prose" fi +# R17 mirror scan — DDD vocabulary guard for the Codex mirror (fn-38 task 7). +# Canonical clean + mechanical rewrite should keep mirror clean, but a derived +# artifact deserves its own validation. Pattern strings are the authoritative +# forbidden list — see CLAUDE.md / fn-38 spec for rationale. +ddd_refs=$( { grep -rE 'ubiquitous language|bounded context|domain expert|aggregate root' "$CODEX_DIR/skills/" "$CODEX_DIR/agents/" 2>/dev/null || true; } | { grep -v '/templates/' || true; } | wc -l | tr -d ' ') +if [ "$ddd_refs" != "0" ]; then + echo -e " ${RED}✗${NC} $ddd_refs R17 forbidden-vocabulary refs in codex mirror — clean canonical first, then re-run sync" + errors=$((errors + 1)) +else + echo -e " ${GREEN}✓${NC} No R17 forbidden vocabulary in Codex mirror" +fi + +# R4 mirror scan — no early-design meta-file references leaked into mirror. +meta_refs=$( { grep -rE 'GLOSSARY-MAP\.md|CONTEXT-MAP\.md' "$CODEX_DIR/skills/" "$CODEX_DIR/agents/" 2>/dev/null || true; } | { grep -v '/templates/' || true; } | wc -l | tr -d ' ') +if [ "$meta_refs" != "0" ]; then + echo -e " ${RED}✗${NC} $meta_refs R4 meta-file refs in codex mirror — clean canonical first, then re-run sync" + errors=$((errors + 1)) +else + echo -e " ${GREEN}✓${NC} No R4 meta-file refs in Codex mirror" +fi + # Validate openai.yaml files — every skill in REQUIRED_OPENAI_YAML_SKILLS # MUST have one. Missing entries fail CI. Extras are fine (utility skills # may opt in later).