diff --git a/.github/workflows/capture-learnings.yaml b/.github/workflows/capture-learnings.yaml index 73930507a..09f4e4330 100644 --- a/.github/workflows/capture-learnings.yaml +++ b/.github/workflows/capture-learnings.yaml @@ -111,6 +111,10 @@ jobs: about what was wrong and how it was fixed. It has been privacy-scanned; use it as given. May be empty for some candidates. - signals: { titleTokens: string[], labels: string[] } — secondary context only + - ciFix: object (optional) — present when this PR ALSO failed then fixed CI + before merge. When present it carries { failingCheckName, diffExcerpt, + logExcerpt? } with the same meaning as the ci-fail-then-pass fields below. It + has been privacy-scanned independently; use it as given. trigger: "ci-fail-then-pass" candidates have: - mergeSha: opaque merge commit SHA (the ONLY identifier you receive) @@ -133,6 +137,11 @@ jobs: prose, not in the title tokens. When reviewExcerpts is empty, fall back to signals but keep the body modest and clearly general rather than inventing specifics. + - When `ciFix` is ALSO present (the PR drew review AND failed then fixed CI), + distill from BOTH: what the reviewers flagged AND what the failing check caught + and how the fixing diff corrected it. These dual-signal PRs are the richest + source — weave the review feedback and the failure→fix into one learning, not + two stapled together. For trigger: "ci-fail-then-pass" — - Distills the learning as "this failure → this fix": what the failing check @@ -210,6 +219,7 @@ jobs: echo "| After seen dedup | $(jq '.telemetry.afterSeenDedup // 0' "$harvest") |" echo "| After solutions dedup | $(jq '.telemetry.afterSolutionsDedup // 0' "$harvest") |" echo "| Candidates emitted | $(jq '.telemetry.emitted // 0' "$harvest") |" + echo "| Dual-trigger candidates | $(jq '.telemetry.dualTriggerCandidates // 0' "$harvest") |" echo "| Enrichment blocked by privacy | $(jq '.telemetry.enrichmentBlocked // 0' "$harvest") |" echo "| Enrichment blocked by secret | $(jq '.telemetry.enrichmentBlockedBySecret // 0' "$harvest") |" else diff --git a/.gitignore b/.gitignore index 39e78a5f8..03c70864f 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ coverage # Local agent artifacts .context/ +.review/ # Wiki lint local artifacts (CI uploads them to actions/upload-artifact; never tracked in git) wiki-lint-report.md diff --git a/docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md b/docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md new file mode 100644 index 000000000..752e966fc --- /dev/null +++ b/docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md @@ -0,0 +1,135 @@ +--- +title: 'Merged-candidate evidence for dual-trigger PRs' +date: 2026-06-23 +status: ready +scope: standard +kind: requirements +parent: docs/brainstorms/2026-06-22-c2-failed-then-fixed-capture-requirements.md +--- + +# Merged-candidate evidence for dual-trigger PRs + +## Purpose + +Fix a delivery bug exposed by live validation of the failed-then-fixed (ci-fix) capture +trigger: a PR that matched both triggers loses its ci-fix evidence before it can be authored. +Attach the ci-fix evidence to the review candidate instead of dropping it, so the +most-informative PRs — those that both drew review rounds and broke then fixed CI — yield one +proposal grounded in both signals. + +## The vision in one sentence + +When a PR both drew substantive review and failed-then-fixed CI, Fro Bot learns from the whole +story — what the reviewers flagged and what broke and got fixed — in a single proposal. + +## Problem frame + +Live validation proved the ci-fix trigger detects correctly (real candidates, real fixing +diffs, privacy gate clean) but never authored a proposal. Root cause: `buildCandidateDigest` +runs within-run dedup (one candidate per merge SHA, review-heavy precedence) *before* the cap +floor. In this repo nearly every substantive PR gets ≥2 Fro Bot review rounds, so a +failed-then-fixed PR is almost always also review-heavy; within-run dedup collapses it to +review-heavy and discards the ci-fix evidence, leaving the floor nothing to reserve. The two +mechanisms — R4 within-run dedup (drop, review-heavy wins) and the ci-fix cap floor — fight +each other. + +The fix reframes the conflict: a PR matching both triggers is not a tiebreak to arbitrate, it +is the **richest** learning source. Attach the ci-fix evidence to the surviving review +candidate instead of dropping it, so one proposal carries both signals. + +## Requirements + +- R1. Keep the discriminated `Candidate` union (`ReviewCandidate | CiFixCandidate`) — `trigger` + stays the branch key, preserving existing narrowing, privacy branches, emitters, and tests. + Add one optional field to `ReviewCandidate`: `ciFix?` (`failingCheckName`, `diffExcerpt`, + `logExcerpt?`). A `ReviewCandidate` may now carry attached ci-fix evidence; `CiFixCandidate` + remains for pure-ci-fix PRs (failed CI, no substantive review). This lower-churn shape was + chosen over collapsing the union into optional-everywhere fields, which is more refactor than + the bug needs. +- R2. Within-run collapse for a same-SHA dual-trigger PR becomes an **attach**, not a drop: the + `ReviewCandidate` survives (review prose is the richer base) and the `CiFixCandidate`'s + evidence is attached to its `ciFix` field. Pure single-trigger PRs are unchanged. One proposal + per merge SHA still holds (parent doc R4) — now via attach, not drop, so ci-fix evidence is + no longer discarded. +- R3. The upstream privacy scan runs **per evidence type, independently and fail-closed**, on + the **final merged candidate** (after the same-SHA merge, never on pre-merge fragments — merged + evidence must not bypass the scan). The scan iterates **every populated evidence field every + time**, not only the field where a hit was found: each present evidence type is redacted for + structural secrets first, then dropped if a private-repo-name or residual hard-secret survives. + A hit in one evidence type drops only that type; clean evidence of the other survives. Because + every populated type is always scanned, a private identifier appearing in both diff and review + prose is caught in both. No unscanned evidence of any type reaches the agent. +- R3a. **Serialization allowlist.** Only the allowlisted evidence/identifier fields may survive + into the digest the agent sees and the public issue. Absent evidence fields carry no metadata + forward; no raw PR object, owner/repo/number/title, or stale fragment may ride along on the + candidate. The authored body is built only from already-scanned evidence, and the open step's + body scan remains the final backstop. +- R4. The agent prompt has one branch that describes whatever evidence is present and instructs + the agent to distill from both when both exist (review prose + the failure→fix story). +- R5. The cap floor (`selectWithCiFixFloor`) stays, retargeted to reserve slots for candidates + that **carry ci-fix evidence** — a pure `CiFixCandidate` OR a `ReviewCandidate` with a `ciFix` + field attached (a `hasCiFixEvidence(candidate)` helper) — so ci-fix learnings are guaranteed + authoring slots regardless of the review-heavy backlog. The floor runs on the merged candidate set + **before** the privacy scan drops any evidence, so a candidate's floor eligibility is decided + by the ci-fix evidence it was harvested with, not by whether that evidence later survived + redaction. (A candidate selected by the floor whose ci-fix evidence is then dropped on a + privacy hit still proceeds — title-only or on its surviving review evidence — exactly as a + privacy hit behaves today.) +- R6. Telemetry stays counts-only: per-evidence-type blocked counters and a count of merged + (dual-trigger) candidates. "Merged" is counted by candidates carrying both evidence types; + review/ci-fix membership derives from which evidence fields are present (no separate + double-counted trigger tally). + +## Scope boundaries + +- Keep the discriminated union; attach `ciFix` evidence to the review candidate on a same-SHA + dual-trigger collapse. +- No change to the propose-only model, dedup-by-merge-SHA (still one proposal per SHA), the + per-run cap, the secret-scan pattern set, or the seen-set / solutions dedup behavior. +- Pure-review and pure-ci-fix PRs still produce single-evidence candidates — the merge only + changes the overlap case. + +### Deferred + +- C3 (issue triage), C4 (cross-run) — unchanged from the parent, still deferred. +- Any change to per-trigger cap budgeting beyond the existing floor. + +## Open questions (for planning) + +- **Q2 — Merge mechanics + ordering.** Where the same-SHA merge runs (it must run first, before + seen-set and solutions dedup, so later stages see the merged candidate), how freshness order + is preserved for the merged record, and the field-collision rules: `signals` when the two + records differ (review vs ci-fix harvested distinct token sets), and how a merged record's + evidence is assembled when one side was already empty. +- **Q5 — Prompt evidence presentation.** How the prompt presents both evidence sets so the + agent distills a unified learning rather than two stapled-together ones. + +(Resolved by the requirements, not open: the union is KEPT, with `ciFix?` attached to +`ReviewCandidate` (R1) — not a union collapse; the floor keys on `hasCiFixEvidence` covering +both the attached and pure cases (R5); the scan runs on the final candidate over every populated +evidence field incl. an attached `ciFix` (R3). Refactor posture: characterization-first to lock +current single-evidence behavior before adding the attach path.) + +## Success criteria + +- **SC1** — A PR that matched both triggers yields ONE candidate carrying both `reviewExcerpts` + and `ciFix` evidence (not collapsed to one source). +- **SC2** — A private-name or secret in the diff drops only the ci-fix evidence; clean review + prose on the same candidate survives (and vice versa). Mutation-proven per evidence type. +- **SC3** — The cap floor reserves a slot for a candidate carrying ci-fix evidence even when it + also carries review evidence and review-heavy candidates fill the rest. +- **SC4** — A live capture run authors a proposal grounded in ci-fix evidence (the bug this doc + fixes): `emitted` includes a candidate with ci-fix evidence, and the opened proposal + references the fixing diff. +- **SC5** — Pure-review and pure-ci-fix candidates still behave as before (characterization). + +## Sources & references + +- Parent: docs/brainstorms/2026-06-22-c2-failed-then-fixed-capture-requirements.md +- Plan: docs/plans/2026-06-22-004-feat-c2-failed-then-fixed-capture-plan.md +- The validation finding: capture run 28069117571 emitted 0 ci-fix candidates despite + detecting 3, because within-run dedup ran before the floor (memory of the diagnosis). +- Code: scripts/capture-learnings-harvest.ts (`Candidate` union, `buildCandidateDigest` + within-run dedup + `selectWithCiFixFloor`, `applyEnrichmentScanAvailability`), + scripts/capture-learnings-privacy.ts (the shared gate), scripts/capture-learnings-open.ts + (title/marker), .github/workflows/capture-learnings.yaml (the prompt) diff --git a/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md b/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md new file mode 100644 index 000000000..6eef16832 --- /dev/null +++ b/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md @@ -0,0 +1,283 @@ +--- +title: 'fix: attach ci-fix evidence to dual-trigger candidates' +type: fix +status: active +date: 2026-06-23 +origin: docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md +--- + +# fix: attach ci-fix evidence to dual-trigger candidates + +## Overview + +A PR that matched both capture triggers loses its ci-fix evidence: within-run dedup runs before +the cap floor and drops the ci-fix candidate (review-heavy wins), so the floor has nothing to +reserve and a failed-then-fixed learning is never authored. Fix it by attaching the ci-fix +evidence to the surviving review candidate instead of dropping it, retargeting the floor to +"carries ci-fix evidence", and scanning every populated evidence field on the final candidate. + +## Problem Frame + +Live validation (capture run that examined 106 merged PRs) detected 3 ci-fix candidates yet +emitted 0: in this repo nearly every substantive PR also gets ≥2 review rounds, so the +review-heavy precedence in within-run dedup collapses every dual-trigger PR to review-heavy +before `selectWithCiFixFloor` runs. The dedup and the floor fight each other. The dual-trigger +PR is the richest learning source, so attach both evidence sets rather than arbitrate +(see origin). + +## Requirements Trace + +- R1. Keep the discriminated union; add optional `ciFix?` evidence to `ReviewCandidate` (origin R1). +- R2. Within-run collapse attaches the ci-fix evidence to the surviving review candidate instead + of dropping it; one proposal per merge SHA still holds (origin R2). +- R3. The privacy scan runs on the final candidate, over every populated evidence field + (`reviewExcerpts` and an attached `ciFix`), each redacted-then-dropped-on-residual + independently (origin R3). +- R3a. Serialization allowlist: only allowlisted evidence/identifier fields reach the digest and + public issue; no raw PR object or stale fragment rides along (origin R3a). +- R4. The agent prompt distills from both evidence sets when both are present (origin R4). +- R5. `selectWithCiFixFloor` reserves slots for candidates that carry ci-fix evidence — a pure + `CiFixCandidate` or a `ReviewCandidate` with `ciFix` attached, via `hasCiFixEvidence` (origin R5). +- R6. Counts-only telemetry: per-evidence-type blocked counters and a dual-trigger (merged) count + (origin R6). + +## Scope Boundaries + +- Keep the discriminated union; attach `ciFix` to `ReviewCandidate` on same-SHA dual-trigger collapse. +- No change to the propose-only model, dedup-by-merge-SHA, the per-run cap, the secret-scan + pattern set, or the seen-set / solutions dedup. +- Pure-review and pure-ci-fix PRs are unchanged. + +### Deferred to Separate Tasks + +- C3 (issue triage), C4 (cross-run) — unchanged, still deferred. +- Collapsing the union into a fully unified optional-fields candidate — rejected as more churn + than the bug needs (origin). + +## Context & Research + +### Relevant Code and Patterns + +- `scripts/capture-learnings-harvest.ts`: + - `Candidate = ReviewCandidate | CiFixCandidate` — add optional `ciFix?` to `ReviewCandidate`. + - `buildCandidateDigest` within-run dedup (~line 361-367): the `byMergeSha` Map keeps one object + per SHA with review-heavy precedence — change to attach the `CiFixCandidate`'s evidence onto + the surviving `ReviewCandidate`. + - The privacy branches (~line 385-413 ReviewCandidate, ~line 385-413 CiFixCandidate): a + `ReviewCandidate` with `ciFix` must now scan BOTH `reviewExcerpts` and the attached `ciFix` + evidence, each independently, on the final candidate. + - `selectWithCiFixFloor` (~line 333): change the predicate from `trigger === 'ci-fail-then-pass'` + to a `hasCiFixEvidence(candidate)` helper covering the attached and pure cases. + - `applyEnrichmentScanAvailability`: when the scan is unavailable, also clear an attached `ciFix` + on a `ReviewCandidate` (today it clears `reviewExcerpts` for review and diff/log for ci-fix). +- `scripts/capture-learnings-open.ts`: title/marker work off `mergeSha` — unaffected by the attach. +- `.github/workflows/capture-learnings.yaml`: the agent prompt — the review branch must mention + the optional attached ci-fix evidence. + +### Institutional Learnings + +- `docs/solutions/best-practices/pure-core-privacy-gates-shared-module-2026-06-22.md` — the scan + stays pure-core, mutation-proven; scanning the final candidate over all fields preserves the + two-chokepoint fail-closed property. +- `docs/solutions/security-issues/verify-whole-public-perimeter-2026-06-22.md` — the attached + evidence is a new field on the public-facing candidate; the allowlist (R3a) keeps the perimeter closed. + +## Key Technical Decisions + +- **Attach, don't collapse.** Keep `trigger` as the branch key; add `ciFix?` to `ReviewCandidate`. + Preserves existing narrowing, privacy branches, emitters, and most tests. Lower churn than a + unified optional-fields candidate, same outcome. +- **Attach runs in within-run dedup, before seen/solutions dedup and the floor**, so every later + stage sees the merged candidate. Freshness order preserved (the review candidate keeps its + position). +- **`hasCiFixEvidence(candidate)` helper** — true for a `CiFixCandidate` or a `ReviewCandidate` + with a non-empty `ciFix`. Used by the floor; defines "carries ci-fix evidence" in one place. +- **Scan all populated evidence on the final candidate.** A `ReviewCandidate` with `ciFix` scans + `reviewExcerpts` AND the attached diff/log independently; a hit in one drops only that one. The + floor decides eligibility on harvested evidence, before any privacy drop (origin R5). + +## Open Questions + +### Resolved During Planning + +- Shape → attach `ciFix?` to `ReviewCandidate`, keep the union (origin). +- Privacy on the merged candidate → scan every populated field on the final candidate, per-type + independent drop (origin R3). +- Floor predicate → `hasCiFixEvidence` (origin R5). + +### Deferred to Implementation + +- The exact `ciFix` field name/shape on `ReviewCandidate` (reuse the `CiFixCandidate` evidence + fields verbatim vs a nested object) — pick the minimal-diff form during implementation. +- Whether `applyEnrichmentScanAvailability` needs a new branch or a tweak to the existing review + branch to also clear an attached `ciFix`. +- The exact telemetry field name for the dual-trigger count. + +## High-Level Technical Design + +> *Directional guidance for review, not implementation specification.* + +``` +within-run dedup (buildCandidateDigest): + for each candidate by mergeSha: + same SHA, one ReviewCandidate + one CiFixCandidate + → keep the ReviewCandidate, set reviewCandidate.ciFix = ciFixCandidate's evidence (ATTACH) + same SHA, two of the same trigger → existing behavior + unique SHA → unchanged + ↓ (seen-set dedup, solutions dedup — unchanged) +selectWithCiFixFloor(ordered, cap, floor): + reserve = filter(hasCiFixEvidence) // was: trigger === 'ci-fail-then-pass' + ...unchanged... + ↓ +privacy scan (per candidate, final): + if ReviewCandidate: + scan reviewExcerpts (redact → drop-on-residual) // existing + if candidate.ciFix: scan ciFix.diff + ciFix.log (independently, same way) // NEW + if CiFixCandidate: scan ciFix evidence // existing + ↓ opaque digest (allowlisted fields only) +agent prompt: review branch notes the optional attached failure→fix evidence +``` + +## Implementation Units + +- [x] **Unit 1: Attach ci-fix evidence in within-run dedup + `hasCiFixEvidence` + floor predicate** + +**Goal:** A same-SHA dual-trigger PR keeps its ci-fix evidence (attached to the review candidate), +and the floor reserves slots for any candidate carrying ci-fix evidence. + +**Requirements:** R1, R2, R5. + +**Dependencies:** None. + +**Files:** +- Modify: `scripts/capture-learnings-harvest.ts` +- Test: `scripts/capture-learnings-harvest.test.ts` + +**Approach:** +- Add optional `ciFix?` (the `CiFixCandidate` evidence: `failingCheckName`, `diffExcerpt`, + `logExcerpt?`) to `ReviewCandidate`. Change the within-run dedup so when a SHA has both a review + and a ci-fix record, the review candidate survives with `ciFix` attached. Add + `hasCiFixEvidence(candidate)` and use it in `selectWithCiFixFloor` instead of the trigger check. + +**Execution note:** Characterization-first — lock the current single-trigger dedup + floor behavior +before changing it. + +**Patterns to follow:** the existing `byMergeSha` dedup, `selectWithCiFixFloor`, the `CiFixCandidate` +evidence fields. + +**Test scenarios:** +- Characterization: a pure-review PR and a pure-ci-fix PR (distinct SHAs) still produce their + current single-evidence candidates; the floor still reserves the pure-ci-fix one. +- Happy: a same-SHA review+ci-fix pair → one `ReviewCandidate` with `ciFix` attached (the + regression for the live bug). Assert the ci-fix evidence is present on the survivor. +- Floor: a dual-trigger candidate (review + attached ci-fix) is reserved by the floor even when + review-heavy candidates fill the rest — the exact production starvation, now fixed. +- Edge: `hasCiFixEvidence` true for pure ci-fix and for review-with-ciFix, false for pure review. +- Edge: two review records same SHA, or two ci-fix records same SHA → existing dedup unchanged. + +**Verification:** gates green; the dual-trigger PR retains ci-fix evidence and the floor reserves it. + +- [x] **Unit 2: Scan attached ci-fix evidence in the privacy step** + +**Goal:** A `ReviewCandidate` with attached `ciFix` has BOTH its review prose and its diff/log +scanned independently, fail-closed, on the final candidate. + +**Requirements:** R3, R3a. + +**Dependencies:** Unit 1. + +**Files:** +- Modify: `scripts/capture-learnings-harvest.ts` +- Modify: `scripts/capture-learnings-harvest.test.ts` + +**Approach:** +- In `buildCandidateDigest`'s ReviewCandidate privacy branch, after scanning `reviewExcerpts`, also + scan an attached `ciFix` (redact structural secrets, then drop the attached `ciFix` on a residual + private-name or hard-secret hit) — independently from the review-prose result. Update + `applyEnrichmentScanAvailability` so an unavailable scan also clears an attached `ciFix`. Keep the + serialization allowlist: only allowlisted fields reach the digest. + +**Execution note:** Test-first; the cross-field privacy mutation proof is load-bearing. + +**Patterns to follow:** the existing ReviewCandidate and CiFixCandidate privacy branches (redact → +drop-on-residual), the truncate-then-scan ordering invariant. + +**Test scenarios:** +- Happy: a dual candidate with clean review prose + clean diff → both survive, redacted. +- Privacy (independent drop): a secret in the attached diff drops ONLY the `ciFix`; clean + `reviewExcerpts` survive. And the reverse (private name in review prose drops only review). +- Privacy (cross-field): a private name in BOTH review prose and the attached diff → caught in + both (every populated field scanned every time). Mutation-proven: bypassing the attached-ciFix + scan lets the diff secret reach the digest → test fails. +- Scan-unavailable: `applyEnrichmentScanAvailability(false)` clears reviewExcerpts AND attached ciFix. +- Allowlist: the emitted dual candidate carries only allowlisted keys (no owner/repo/number/title). + +**Verification:** gates green; no unscanned attached evidence reaches the digest; mutation proof bites. + +- [x] **Unit 3: Prompt + telemetry for dual-trigger candidates** + +**Goal:** The agent distills from both evidence sets when present; telemetry counts dual-trigger +candidates and per-evidence-type blocks. + +**Requirements:** R4, R6. + +**Dependencies:** Unit 2. + +**Files:** +- Modify: `.github/workflows/capture-learnings.yaml` +- Modify: `scripts/capture-learnings-harvest.ts` (telemetry counters) +- Modify: `scripts/capture-learnings-harvest.test.ts` + +**Approach:** +- Update the agent prompt's review branch to note that a review candidate may also carry the + failure→fix evidence (`ciFix`), and instruct distilling from both when present. Add a + dual-trigger (merged) count and ensure the per-evidence-type blocked counters + (`enrichmentBlocked`, `enrichmentBlockedBySecret`) cover the attached-ciFix drops. Surface the + dual-trigger count in the step summary. + +**Test scenarios:** +- Telemetry: a run with one dual-trigger candidate increments the merged count; a per-type drop + increments the right blocked counter. + +**Test expectation for the YAML prompt:** none — prompt wording, validated by actionlint + a manual +dispatch showing a dual-trigger proposal that cites the fixing diff. + +**Verification:** actionlint clean; a manual capture run authors a proposal grounded in both the +review prose and the fixing diff (the live proof the original bug is fixed). + +## System-Wide Impact + +- **Interaction graph:** the attach changes only the within-run dedup branch; seen/solutions dedup, + cap, floor (predicate swap), and privacy (added attached-field scan) are the touch points. The + open step and emitters are unaffected. +- **Error propagation:** privacy scan stays fail-closed per evidence field; an attached-ciFix hit + drops only that field. +- **State lifecycle risks:** none new — no persisted state; one proposal per merge SHA preserved. +- **API surface parity:** the shared privacy module is unchanged (the secret/redaction functions + already exist); both chokepoints still use it. +- **Unchanged invariants:** propose-only model, merge-SHA marker, `learning-proposal` label, the + per-run cap, opacity (allowlisted fields only), the discriminated union itself. + +## Risks & Dependencies + +| Risk | Mitigation | +|------|------------| +| Attached ci-fix evidence bypasses the scan | R3: scan the final candidate over every populated field; mutation-proven | +| Floor predicate misses attached evidence | `hasCiFixEvidence` covers both pure and attached; tested | +| Refactor breaks current single-trigger behavior | Characterization-first locks pure-review/pure-ci-fix before the change | +| Empty-but-present `ciFix` confuses `hasCiFixEvidence` | Define "carries" precisely (non-empty diff); test the empty/absent boundary | + +## Documentation / Operational Notes + +- After it lands, dispatch a capture run to confirm a dual-trigger proposal opens grounded in the + fixing diff — the live proof the delivery bug is fixed (SC4). + +## Sources & References + +- **Origin document:** docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md +- Parent: docs/brainstorms/2026-06-22-c2-failed-then-fixed-capture-requirements.md, + docs/plans/2026-06-22-004-feat-c2-failed-then-fixed-capture-plan.md +- Code: scripts/capture-learnings-harvest.ts, scripts/capture-learnings-privacy.ts, + scripts/capture-learnings-open.ts, .github/workflows/capture-learnings.yaml +- Learnings: docs/solutions/best-practices/pure-core-privacy-gates-shared-module-2026-06-22.md, + docs/solutions/security-issues/verify-whole-public-perimeter-2026-06-22.md diff --git a/scripts/capture-learnings-harvest.test.ts b/scripts/capture-learnings-harvest.test.ts index 109224346..1ae455912 100644 --- a/scripts/capture-learnings-harvest.test.ts +++ b/scripts/capture-learnings-harvest.test.ts @@ -21,6 +21,7 @@ import { FRO_BOT_REVIEWER_LOGINS, harvestCandidates, harvestCiFixCandidates, + hasCiFixEvidence, LEARNING_PROPOSAL_LABEL, MAX_EXCERPT_CHARS_PER_CANDIDATE, parseMergeShaMarker, @@ -191,18 +192,21 @@ describe('buildCandidateDigest', () => { }) describe('opacity guarantee', () => { - it('review-heavy candidate has ONLY trigger, mergeSha, reviewRounds, signals, reviewExcerpts — no owner/repo/number/title', () => { - // #given a review-heavy candidate + it('review-heavy candidate has ONLY allowed keys — no owner/repo/number/title', () => { + // #given a review-heavy candidate (pure review, no ci-fix) const input = makeDigestInput() // #when building the digest const result = buildCandidateDigest(input) - // #then each review-heavy candidate has exactly the allowed keys + // #then each review-heavy candidate has only the allowed keys + // ciFix is optional — present only when the PR also matched ci-fail-then-pass + const baseAllowedKeys = ['mergeSha', 'reviewExcerpts', 'reviewRounds', 'signals', 'trigger'].sort() + const allowedKeysWithCiFix = [...baseAllowedKeys, 'ciFix'].sort() for (const candidate of result.candidates) { if (candidate.trigger !== 'review-heavy') continue const keys = Object.keys(candidate).sort() - expect(keys).toEqual(['mergeSha', 'reviewExcerpts', 'reviewRounds', 'signals', 'trigger'].sort()) + expect([baseAllowedKeys, allowedKeysWithCiFix]).toContainEqual(keys) // Explicitly assert forbidden keys are absent expect(candidate).not.toHaveProperty('owner') expect(candidate).not.toHaveProperty('repo') @@ -618,7 +622,7 @@ describe('buildCandidateDigest — CiFixCandidate union support', () => { expect(result.telemetry.afterSeenDedup).toBe(0) }) - it('within-run dedup (R4): same SHA from both triggers yields one candidate, review-heavy wins', () => { + it('within-run dedup: same SHA from both triggers yields one candidate, review-heavy wins WITH ci-fix attached', () => { // #given a review-heavy and a ci-fix candidate with the SAME mergeSha, NOT yet proposed // (a PR that matched both triggers in this run) const sha = 'bothtrg1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' @@ -630,9 +634,450 @@ describe('buildCandidateDigest — CiFixCandidate union support', () => { // #when building the digest const result = buildCandidateDigest(input) - // #then exactly one candidate is emitted (R4), and it is the review-heavy one + // #then exactly one candidate is emitted, and it is the review-heavy one expect(result.candidates).toHaveLength(1) expect(result.candidates[0]?.trigger).toBe('review-heavy') + // #then the ci-fix evidence is ATTACHED to the surviving review candidate + // (not dropped — the dual-trigger PR is the richest learning source) + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix).toBeDefined() + expect(result.candidates[0].ciFix?.failingCheckName).toBe(ciFixCandidate.failingCheckName) + expect(result.candidates[0].ciFix?.diffExcerpt).toBe(ciFixCandidate.diffExcerpt) + } + }) +}) + +// --------------------------------------------------------------------------- +// CHARACTERIZATION: pure-review and pure-ci-fix single-trigger behavior is unchanged +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — CHARACTERIZATION: single-trigger behavior unchanged', () => { + it('CHARACTERIZATION: pure-review PR (distinct SHA) still produces a ReviewCandidate with no ciFix', () => { + // #given a pure review-heavy candidate with a unique SHA (no ci-fix counterpart) + // This locks the current behavior: pure-review PRs are unaffected by the dual-trigger attach. + const sha = 'purereview1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const reviewCandidate = makeCandidate({ + mergeSha: sha, + reviewRounds: 3, + reviewExcerpts: ['Fix the null check.'], + }) + const input = makeDigestInput({mergedPrs: [reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted + expect(result.candidates).toHaveLength(1) + // #then it is a ReviewCandidate with the expected fields + expect(result.candidates[0]?.trigger).toBe('review-heavy') + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].mergeSha).toBe(sha) + expect(result.candidates[0].reviewRounds).toBe(3) + expect(result.candidates[0].reviewExcerpts).toEqual(['Fix the null check.']) + // #then NO ciFix is attached (pure review — no ci-fix counterpart) + expect(result.candidates[0].ciFix).toBeUndefined() + } + }) + + it('CHARACTERIZATION: pure-ci-fix PR (distinct SHA) still produces a CiFixCandidate unchanged', () => { + // #given a pure ci-fix candidate with a unique SHA (no review counterpart) + // This locks the current behavior: pure-ci-fix PRs are unaffected by the dual-trigger attach. + const sha = 'purefix001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / lint', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }) + const input = makeDigestInput({mergedPrs: [ciFixCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted + expect(result.candidates).toHaveLength(1) + // #then it is a CiFixCandidate with the expected fields + expect(result.candidates[0]?.trigger).toBe('ci-fail-then-pass') + if (result.candidates[0]?.trigger === 'ci-fail-then-pass') { + expect(result.candidates[0].mergeSha).toBe(sha) + expect(result.candidates[0].failingCheckName).toBe('CI / lint') + expect(result.candidates[0].diffExcerpt).toContain('-bad') + } + }) + + it('CHARACTERIZATION: selectWithCiFixFloor still reserves a slot for a pure-ci-fix candidate', () => { + // #given the production starvation scenario with a pure ci-fix candidate + // This locks the floor behavior for pure-ci-fix candidates (unchanged by this unit). + const sha = (n: number): string => `${n}`.padStart(40, '0') + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const pureCiFix = makeCiFixCandidate({mergeSha: sha(99)}) + const ordered = [...reviewHeavy, pureCiFix] + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the pure ci-fix candidate is reserved (unchanged behavior) + expect(selected).toHaveLength(5) + expect(selected).toContain(pureCiFix) + expect(selected.filter(c => c.trigger === 'review-heavy')).toHaveLength(4) + }) +}) + +// --------------------------------------------------------------------------- +// hasCiFixEvidence helper +// --------------------------------------------------------------------------- + +describe('hasCiFixEvidence', () => { + it('returns true for a pure CiFixCandidate', () => { + // #given a pure ci-fix candidate + const candidate = makeCiFixCandidate() + // #then hasCiFixEvidence is true + expect(hasCiFixEvidence(candidate)).toBe(true) + }) + + it('returns true for a ReviewCandidate with a non-empty ciFix.diffExcerpt', () => { + // #given a review candidate with attached ci-fix evidence (non-empty diff) + const candidate: ReviewCandidate = { + ...makeCandidate(), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }, + } + // #then hasCiFixEvidence is true + expect(hasCiFixEvidence(candidate)).toBe(true) + }) + + it('returns false for a pure ReviewCandidate (no ciFix)', () => { + // #given a pure review candidate with no ciFix field + const candidate = makeCandidate() + // #then hasCiFixEvidence is false + expect(hasCiFixEvidence(candidate)).toBe(false) + }) + + it('returns false for a ReviewCandidate with an empty ciFix.diffExcerpt', () => { + // #given a review candidate with a ciFix field but empty diffExcerpt + // (evidence was cleared, e.g. by the privacy scan) + const candidate: ReviewCandidate = { + ...makeCandidate(), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '', // empty — not substantive + }, + } + // #then hasCiFixEvidence is false (empty diff = no substantive evidence) + expect(hasCiFixEvidence(candidate)).toBe(false) + }) + + it('returns true for a ReviewCandidate with ciFix that has a logExcerpt but non-empty diff', () => { + // #given a review candidate with attached ci-fix evidence including a log excerpt + const candidate: ReviewCandidate = { + ...makeCandidate(), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + logExcerpt: 'Error: test failed at line 42', + }, + } + // #then hasCiFixEvidence is true (non-empty diff is the key signal) + expect(hasCiFixEvidence(candidate)).toBe(true) + }) +}) + +// --------------------------------------------------------------------------- +// dual-trigger attach — the live-bug regression +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — dual-trigger attach regression', () => { + it('REGRESSION: same-SHA review+ci-fix pair → ONE ReviewCandidate with ciFix attached', () => { + // #given a review-heavy and a ci-fix candidate with the SAME mergeSha + // This is the live bug: before this fix, the ci-fix evidence was dropped. + const sha = 'dualtrg1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / test', + diffExcerpt: '--- a/scripts/foo.ts\n+++ b/scripts/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }) + const reviewCandidate = makeCandidate({ + mergeSha: sha, + reviewRounds: 3, + reviewExcerpts: ['Fix the null check.'], + }) + // ci-fix listed first to prove precedence is by trigger, not array order + const input = makeDigestInput({mergedPrs: [ciFixCandidate, reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly ONE candidate is emitted (one proposal per SHA) + expect(result.candidates).toHaveLength(1) + + // #then the survivor is a ReviewCandidate (review prose is the richer base) + expect(result.candidates[0]?.trigger).toBe('review-heavy') + + // #then the ci-fix evidence IS ATTACHED (not dropped — the regression fix) + if (result.candidates[0]?.trigger === 'review-heavy') { + const emitted = result.candidates[0] + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.failingCheckName).toBe('CI / test') + expect(emitted.ciFix?.diffExcerpt).toContain('-bad') + // #then the review evidence is also preserved + expect(emitted.reviewRounds).toBe(3) + expect(emitted.reviewExcerpts).toEqual(['Fix the null check.']) + } + }) + + it('telemetry: a dual-trigger candidate increments dualTriggerCandidates; pure candidates do not', () => { + // #given one dual-trigger (review+ci-fix same SHA) and one pure-review candidate + const dualSha = 'mergedcc1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const input = makeDigestInput({ + mergedPrs: [ + makeCiFixCandidate({mergeSha: dualSha, failingCheckName: 'CI / test', diffExcerpt: '-bad\n+good'}), + makeCandidate({mergeSha: dualSha, reviewExcerpts: ['Fix it.']}), + makeCandidate({mergeSha: 'purerev1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', reviewExcerpts: ['Other.']}), + ], + }) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is counted as dual-trigger + expect(result.telemetry.dualTriggerCandidates).toBe(1) + }) + + it('dual-trigger: review listed first, ci-fix listed second → same attach result', () => { + // #given review-heavy listed BEFORE ci-fix (opposite order from the regression test) + const sha = 'dualtrg2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const reviewCandidate = makeCandidate({mergeSha: sha, reviewRounds: 2}) + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / lint', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-x\n+y', + }) + const input = makeDigestInput({mergedPrs: [reviewCandidate, ciFixCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then one ReviewCandidate with ciFix attached regardless of input order + expect(result.candidates).toHaveLength(1) + expect(result.candidates[0]?.trigger).toBe('review-heavy') + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix).toBeDefined() + expect(result.candidates[0].ciFix?.failingCheckName).toBe('CI / lint') + } + }) + + it('dual-trigger: logExcerpt is attached when present on the ci-fix candidate', () => { + // #given a ci-fix candidate with a logExcerpt + const sha = 'dualtrg3aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + logExcerpt: 'Error: test failed at line 42', + }) + const reviewCandidate = makeCandidate({mergeSha: sha}) + const input = makeDigestInput({mergedPrs: [ciFixCandidate, reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the logExcerpt is also attached + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix?.logExcerpt).toBe('Error: test failed at line 42') + } + }) + + it('dual-trigger: logExcerpt is absent when not present on the ci-fix candidate', () => { + // #given a ci-fix candidate WITHOUT a logExcerpt + const sha = 'dualtrg4aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const ciFixCandidate = makeCiFixCandidate({mergeSha: sha}) // no logExcerpt in fixture + const reviewCandidate = makeCandidate({mergeSha: sha}) + const input = makeDigestInput({mergedPrs: [ciFixCandidate, reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then logExcerpt is absent (not set to undefined explicitly — just not present) + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix?.logExcerpt).toBeUndefined() + } + }) +}) + +// --------------------------------------------------------------------------- +// floor-fix — dual-trigger candidate is reserved by selectWithCiFixFloor +// --------------------------------------------------------------------------- + +describe('selectWithCiFixFloor — floor-fix: dual-trigger candidate is reserved', () => { + const sha = (n: number): string => `${n}`.padStart(40, '0') + + it('FLOOR FIX: a dual-trigger ReviewCandidate (with ciFix) is reserved by the floor even when review-heavy candidates fill the rest', () => { + // #given the production starvation scenario, now with a dual-trigger candidate + // (review-heavy + attached ci-fix) instead of a pure ci-fix candidate. + // Before this fix, the dual-trigger candidate would NOT be reserved (trigger !== 'ci-fail-then-pass'). + const dualTriggerCandidate: ReviewCandidate = { + ...makeCandidate({mergeSha: sha(99)}), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }, + } + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const ordered = [...reviewHeavy, dualTriggerCandidate] // dual-trigger sorts LAST + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the dual-trigger candidate IS included (hasCiFixEvidence is true for it) + expect(selected).toHaveLength(5) + expect(selected).toContain(dualTriggerCandidate) + // #then 4 review-heavy candidates fill the rest (pure review, no ciFix) + const pureReviewSelected = selected.filter( + c => c.trigger === 'review-heavy' && !('ciFix' in c && c.ciFix !== undefined), + ) + expect(pureReviewSelected).toHaveLength(4) + }) + + it('FLOOR FIX: a ReviewCandidate with empty ciFix.diffExcerpt is NOT reserved (evidence cleared)', () => { + // #given a review candidate with a ciFix field but empty diffExcerpt (evidence cleared) + const clearedCiFix: ReviewCandidate = { + ...makeCandidate({mergeSha: sha(99)}), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '', // cleared — not substantive + }, + } + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const ordered = [...reviewHeavy, clearedCiFix] + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the cleared-ciFix candidate is NOT reserved (hasCiFixEvidence is false) + // It may or may not be included depending on position, but NOT via the floor reservation + // With 8 review-heavy + 1 cleared-ciFix, cap=5 → only the first 5 review-heavy are taken + expect(selected).not.toContain(clearedCiFix) + }) + + it('FLOOR FIX: buildCandidateDigest end-to-end — dual-trigger candidate is reserved by the floor', () => { + // #given the production starvation scenario end-to-end: + // - A same-SHA review+ci-fix pair (will be collapsed to a dual-trigger ReviewCandidate) + // - Enough pure review-heavy candidates to fill the cap without the floor + const dualSha = 'dualfloor1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const ciFixForDual = makeCiFixCandidate({ + mergeSha: dualSha, + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }) + const reviewForDual = makeCandidate({mergeSha: dualSha, reviewRounds: 2}) + + // 8 pure review-heavy candidates that would fill the cap without the floor + const pureReview = Array.from({length: 8}, (_, i) => + makeCandidate({mergeSha: `purereview${i}${'0'.repeat(30 - String(i).length)}`}), + ) + + // ci-fix listed first, then review, then pure review (ci-fix sorts last in the list) + const input = makeDigestInput({ + mergedPrs: [...pureReview, ciFixForDual, reviewForDual], + maxLearnings: 5, + }) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly 5 candidates are emitted (cap) + expect(result.candidates).toHaveLength(5) + + // #then the dual-trigger candidate IS in the output (floor reserved it) + const dualCandidate = result.candidates.find(c => c.mergeSha === dualSha) + expect(dualCandidate).toBeDefined() + expect(dualCandidate?.trigger).toBe('review-heavy') + if (dualCandidate?.trigger === 'review-heavy') { + // #then the ci-fix evidence is attached + expect(dualCandidate.ciFix).toBeDefined() + expect(dualCandidate.ciFix?.failingCheckName).toBe('CI / test') + } + }) +}) + +// --------------------------------------------------------------------------- +// dual-trigger edge cases — same-trigger duplicates and freshness order +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — dual-trigger edge cases', () => { + it('EDGE: two review-heavy records with same SHA → first-seen wins (existing behavior unchanged)', () => { + // #given two review-heavy candidates with the SAME SHA + const sha = 'tworev001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const review1 = makeCandidate({mergeSha: sha, reviewRounds: 2}) + const review2 = makeCandidate({mergeSha: sha, reviewRounds: 5}) // different rounds + const input = makeDigestInput({mergedPrs: [review1, review2]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted (first-seen wins for same-trigger) + expect(result.candidates).toHaveLength(1) + expect(result.candidates[0]?.trigger).toBe('review-heavy') + // #then no ciFix is attached (no ci-fix counterpart) + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix).toBeUndefined() + } + }) + + it('EDGE: two ci-fix records with same SHA → first-seen wins (existing behavior unchanged)', () => { + // #given two ci-fix candidates with the SAME SHA + const sha = 'twocifix1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const cifix1 = makeCiFixCandidate({mergeSha: sha, failingCheckName: 'CI / test'}) + const cifix2 = makeCiFixCandidate({mergeSha: sha, failingCheckName: 'CI / lint'}) + const input = makeDigestInput({mergedPrs: [cifix1, cifix2]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted (first-seen wins for same-trigger) + expect(result.candidates).toHaveLength(1) + expect(result.candidates[0]?.trigger).toBe('ci-fail-then-pass') + // #then the first ci-fix candidate's check name is used + if (result.candidates[0]?.trigger === 'ci-fail-then-pass') { + expect(result.candidates[0].failingCheckName).toBe('CI / test') + } + }) + + it('ORDER: dual-trigger candidate keeps its review candidate position in the output', () => { + // #given a mix of candidates where the dual-trigger review candidate is at position N + // The review candidate's position in the output should be preserved (freshness order). + const sha1 = 'order001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const sha2 = 'order002aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' // dual-trigger SHA + const sha3 = 'order003aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + + const review1 = makeCandidate({mergeSha: sha1}) + const reviewForDual = makeCandidate({mergeSha: sha2, reviewRounds: 3}) + const ciFixForDual = makeCiFixCandidate({mergeSha: sha2}) + const review3 = makeCandidate({mergeSha: sha3}) + + // Input order: review1, reviewForDual, ciFixForDual, review3 + // The dual-trigger review candidate is at position 1 (0-indexed) + const input = makeDigestInput({ + mergedPrs: [review1, reviewForDual, ciFixForDual, review3], + maxLearnings: 10, + }) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then 3 candidates are emitted (review1, dual-trigger, review3) + expect(result.candidates).toHaveLength(3) + + // #then the dual-trigger candidate is at position 1 (its original review position) + expect(result.candidates[1]?.mergeSha).toBe(sha2) + expect(result.candidates[1]?.trigger).toBe('review-heavy') + if (result.candidates[1]?.trigger === 'review-heavy') { + expect(result.candidates[1].ciFix).toBeDefined() + } + + // #then the overall order is preserved: sha1, sha2 (dual), sha3 + expect(result.candidates.map(c => c.mergeSha)).toEqual([sha1, sha2, sha3]) }) }) @@ -1500,6 +1945,7 @@ describe('harvest→open schema contract', () => { afterSeenDedup: 3, afterSolutionsDedup: 2, emitted: 2, + dualTriggerCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, @@ -3079,6 +3525,7 @@ describe('main() wiring: both candidate sources concatenated via buildCandidateD afterSeenDedup: 0, afterSolutionsDedup: 0, emitted: 0, + dualTriggerCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, @@ -3368,3 +3815,437 @@ describe('buildCandidateDigest — CiFix logExcerpt privacy scan', () => { expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) }) }) + +// --------------------------------------------------------------------------- +// Attached ciFix evidence scanned independently in ReviewCandidate +// --------------------------------------------------------------------------- + +// Helper: make a dual candidate (ReviewCandidate with ciFix attached) +function makeDualCandidate( + reviewExcerpts: string[], + ciFix: {failingCheckName: string; diffExcerpt: string; logExcerpt?: string}, + sha = 'dual0001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', +): ReviewCandidate { + return { + ...makeCandidate({mergeSha: sha, reviewExcerpts}), + ciFix, + } +} + +describe('buildCandidateDigest — attached ciFix privacy scan (independent from reviewExcerpts)', () => { + it('HAPPY: dual candidate with clean review prose + clean diff → both survive, redacted', () => { + // #given a dual candidate with clean review prose and clean ci-fix diff + const candidate = makeDualCandidate(['Please fix the null check here.'], { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: 'Error: test failed at line 42', + }) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts survive + expect(emitted.reviewExcerpts).toEqual(['Please fix the null check here.']) + // #then ciFix survives with its evidence + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('-bad\n+good') + expect(emitted.ciFix?.logExcerpt).toBe('Error: test failed at line 42') + // #then no counters incremented + expect(result.telemetry.enrichmentBlocked).toBe(0) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + }) + + it('INDEPENDENT DROP (diff hit): secret in ciFix.diffExcerpt → ciFix cleared, reviewExcerpts SURVIVE', () => { + // #given a dual candidate with a secret in the attached diff but clean review prose + const secretDiff = '-old\n+token=ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4' + const candidate = makeDualCandidate( + ['Please fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: secretDiff, + }, + 'dual0002aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts SURVIVE (clean — independent scan) + expect(emitted.reviewExcerpts).toEqual(['Please fix the null check here.']) + // #then ciFix is CLEARED (secret in diff) + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('') + // #then enrichmentBlockedBySecret is incremented (for the ciFix drop) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(1) + // #then the secret does NOT appear in the serialized digest + expect(JSON.stringify(result)).not.toContain('ghp_') + }) + + it('INDEPENDENT DROP (review hit): private name in reviewExcerpts → reviewExcerpts cleared, clean ciFix SURVIVES', () => { + // #given a dual candidate with a private name in review prose but clean ci-fix diff + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const candidate = makeDualCandidate( + ['See testowner/secret-repo#42 for context.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + }, + 'dual0003aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts are CLEARED (private name hit) + expect(emitted.reviewExcerpts).toEqual([]) + // #then ciFix SURVIVES (clean — independent scan) + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('-bad\n+good') + // #then enrichmentBlocked is incremented (for the review drop) + expect(result.telemetry.enrichmentBlocked).toBe(1) + // #then enrichmentBlockedBySecret is NOT incremented + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + // #then private name does NOT appear in serialized digest + expect(JSON.stringify(result)).not.toContain('testowner/secret-repo') + }) + + it('CROSS-FIELD MUTATION PROOF: private name in BOTH reviewExcerpts AND ciFix.diffExcerpt → caught in BOTH', () => { + // #given a dual candidate with the SAME private name in BOTH review prose AND the attached diff + // This is the load-bearing mutation proof: if the attached-ciFix scan is removed, + // the private name in the diff reaches the serialized digest → this test FAILS. + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const candidate = makeDualCandidate( + ['See testowner/secret-repo#42 for context.'], // private name in review prose + { + failingCheckName: 'CI / test', + diffExcerpt: '-old testowner/secret-repo config\n+new config', // private name in diff + }, + 'dual0004aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts are CLEARED (private name in review prose) + expect(emitted.reviewExcerpts).toEqual([]) + // #then ciFix is CLEARED (private name in diff — caught by the INDEPENDENT scan) + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('') + // #then enrichmentBlocked is incremented TWICE (once for review, once for ciFix) + expect(result.telemetry.enrichmentBlocked).toBe(2) + // #then the private name does NOT appear ANYWHERE in the serialized digest + const serialized = JSON.stringify(result) + expect(serialized).not.toContain('testowner/secret-repo') + expect(serialized).not.toContain('testowner--secret-repo') + + // MUTATION PROOF: if we remove the ciFix scan (pass empty privateTokens for the ciFix scan), + // the diff's private name would reach the digest. We prove this by showing that a candidate + // with the private name ONLY in the diff (not in reviewExcerpts) IS caught by the ciFix scan. + const candidateOnlyDiffHit = makeDualCandidate( + ['Clean review prose — no private names here.'], // clean review + { + failingCheckName: 'CI / test', + diffExcerpt: '-old testowner/secret-repo config\n+new config', // private name ONLY in diff + }, + 'dual0004baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', + ) + const resultOnlyDiff = buildCandidateDigest(makeDigestInput({mergedPrs: [candidateOnlyDiffHit], privateTokens})) + const emittedOnlyDiff = asReviewCandidate(resultOnlyDiff.candidates[0]) + // #then reviewExcerpts SURVIVE (clean) + expect(emittedOnlyDiff.reviewExcerpts).toEqual(['Clean review prose — no private names here.']) + // #then ciFix is CLEARED (private name in diff — the ciFix scan is load-bearing) + expect(emittedOnlyDiff.ciFix?.diffExcerpt).toBe('') + // #then the private name does NOT appear in the serialized digest + expect(JSON.stringify(resultOnlyDiff)).not.toContain('testowner/secret-repo') + // If the ciFix scan were removed, the diff's private name would appear here → test fails + }) + + it('SCAN-UNAVAILABLE: applyEnrichmentScanAvailability(false) clears reviewExcerpts AND attached ciFix', () => { + // #given a dual candidate with both review prose and ci-fix evidence + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: 'Error: test failed', + }, + 'dual0005aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + + // #when scan is unavailable (token load failed) + const result = applyEnrichmentScanAvailability([candidate], false) + + // #then reviewExcerpts are cleared + const r0 = result[0] + if (r0?.trigger === 'review-heavy') { + expect(r0.reviewExcerpts).toEqual([]) + // #then ciFix evidence is ALSO cleared (no unscanned evidence reaches the digest) + expect(r0.ciFix).toBeDefined() + expect(r0.ciFix?.diffExcerpt).toBe('') + expect(r0.ciFix?.logExcerpt).toBeUndefined() + } + }) + + it('SCAN-UNAVAILABLE: applyEnrichmentScanAvailability(true) leaves dual candidate unchanged', () => { + // #given a dual candidate with both review prose and ci-fix evidence + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: 'Error: test failed', + }, + 'dual0006aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + + // #when scan is available + const result = applyEnrichmentScanAvailability([candidate], true) + + // #then candidates are returned unchanged (same reference) + expect(result).toBe(result) // same array + const r0 = result[0] + if (r0?.trigger === 'review-heavy') { + expect(r0.reviewExcerpts).toEqual(['Fix the null check here.']) + expect(r0.ciFix?.diffExcerpt).toBe('-bad\n+good') + expect(r0.ciFix?.logExcerpt).toBe('Error: test failed') + } + }) + + it('ALLOWLIST/OPACITY: emitted dual candidate carries only allowlisted keys — no owner/repo/number/title', () => { + // #given a dual candidate + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + }, + 'dual0007aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the emitted candidate has only allowlisted keys + expect(result.candidates).toHaveLength(1) + const emitted = result.candidates[0] + expect(emitted).not.toHaveProperty('owner') + expect(emitted).not.toHaveProperty('repo') + expect(emitted).not.toHaveProperty('number') + expect(emitted).not.toHaveProperty('title') + // #then ciFix is present with only allowlisted sub-fields + if (emitted?.trigger === 'review-heavy' && emitted.ciFix !== undefined) { + const ciFixKeys = Object.keys(emitted.ciFix).sort() + // ciFix may have failingCheckName, diffExcerpt, and optionally logExcerpt + const allowedCiFixKeys = ['diffExcerpt', 'failingCheckName'].sort() + const allowedCiFixKeysWithLog = [...allowedCiFixKeys, 'logExcerpt'].sort() + expect([allowedCiFixKeys, allowedCiFixKeysWithLog]).toContainEqual(ciFixKeys) + } + }) + + it('REDACT (not block): /Users/ path in ciFix.diffExcerpt → redacted to [REDACTED], ciFix still emitted', () => { + // #given a dual candidate with a file path in the attached diff (redact-class, not block-class) + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: 'Config loaded from /Users/marcus/.ssh/config for the build.', + }, + 'dual0008aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted (not blocked) + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then ciFix is still present (not cleared) + expect(emitted.ciFix).toBeDefined() + // #then the path is redacted in the diff + expect(emitted.ciFix?.diffExcerpt).not.toContain('/Users/marcus') + expect(emitted.ciFix?.diffExcerpt).toContain('[REDACTED]') + // #then neither counter is incremented (redaction, not blocking) + expect(result.telemetry.enrichmentBlocked).toBe(0) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + }) + + it('REDACT (not block): *.fro.bot hostname in ciFix.diffExcerpt → redacted, ciFix still emitted', () => { + // #given a dual candidate with an internal hostname in the attached diff + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: 'Connecting to api.fro.bot for the check.', + }, + 'dual0009aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted (not blocked) + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then ciFix is still present (not cleared) + expect(emitted.ciFix).toBeDefined() + // #then the hostname is redacted in the diff + expect(emitted.ciFix?.diffExcerpt).not.toContain('api.fro.bot') + expect(emitted.ciFix?.diffExcerpt).toContain('[REDACTED]') + // #then neither counter is incremented + expect(result.telemetry.enrichmentBlocked).toBe(0) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + }) + + it('dual candidate with no ciFix → reviewExcerpts scan unchanged (pure review path unaffected)', () => { + // #given a pure review candidate (no ciFix) with clean prose + const candidate = makeCandidate({ + mergeSha: 'dual0010aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + reviewExcerpts: ['Fix the null check here.'], + }) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted with reviewExcerpts intact + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + expect(emitted.reviewExcerpts).toEqual(['Fix the null check here.']) + expect(emitted.ciFix).toBeUndefined() + expect(result.telemetry.enrichmentBlocked).toBe(0) + }) + + it('ciFix with secret in logExcerpt (not diffExcerpt) → ciFix cleared, reviewExcerpts survive', () => { + // #given a dual candidate with a secret in the attached logExcerpt but clean diff and review prose + const secretLog = 'Error: auth failed with token ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA5' + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: secretLog, + }, + 'dual0011aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts SURVIVE (clean — independent scan) + expect(emitted.reviewExcerpts).toEqual(['Fix the null check here.']) + // #then ciFix is CLEARED (secret in logExcerpt) + expect(emitted.ciFix?.diffExcerpt).toBe('') + expect(emitted.ciFix?.logExcerpt).toBeUndefined() + // #then enrichmentBlockedBySecret is incremented + expect(result.telemetry.enrichmentBlockedBySecret).toBe(1) + // #then the secret does NOT appear in the serialized digest + expect(JSON.stringify(result)).not.toContain('ghp_') + }) +}) + +// --------------------------------------------------------------------------- +// counts-before-scan invariant — dualTriggerCandidates counted pre-scan +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — dualTriggerCandidates counted before privacy scan', () => { + it('dual-trigger candidate whose ciFix is cleared by privacy scan still increments dualTriggerCandidates', () => { + // #given a dual-trigger candidate whose attached ciFix contains a private name + // (so the privacy scan will CLEAR the ciFix evidence) + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const sha = 'dual0012aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-old testowner/secret-repo config\n+new config', // private name → will be cleared + }, + sha, + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + + // #then ciFix is CLEARED (private name in diff) + expect(emitted.ciFix?.diffExcerpt).toBe('') + + // #then dualTriggerCandidates is STILL 1 — counted on the pre-scan capped set, + // not on the post-scan output. The candidate had ciFix attached before the scan. + expect(result.telemetry.dualTriggerCandidates).toBe(1) + }) +}) + +// --------------------------------------------------------------------------- +// mixed threat types — private NAME in reviewExcerpts, ghp_ SECRET in ciFix +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — mixed threat types in dual-trigger candidate', () => { + it('private NAME in reviewExcerpts AND ghp_ SECRET in ciFix.diffExcerpt → both counters increment independently', () => { + // #given a dual-trigger candidate where: + // - reviewExcerpts contains a private repo name (enrichmentBlocked counter) + // - ciFix.diffExcerpt contains a GitHub PAT (enrichmentBlockedBySecret counter) + // This proves both counter classes increment correctly when each field has a different threat. + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const sha = 'dual0013aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const candidate = makeDualCandidate( + ['See testowner/secret-repo#42 for context.'], // private NAME → enrichmentBlocked + { + failingCheckName: 'CI / test', + diffExcerpt: '-old\n+token=ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA6', // ghp_ SECRET → enrichmentBlockedBySecret + }, + sha, + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + + // #then reviewExcerpts are CLEARED (private name hit → enrichmentBlocked) + expect(emitted.reviewExcerpts).toEqual([]) + + // #then ciFix is CLEARED (ghp_ secret hit → enrichmentBlockedBySecret) + expect(emitted.ciFix?.diffExcerpt).toBe('') + + // #then enrichmentBlocked is 1 (private name in reviewExcerpts) + expect(result.telemetry.enrichmentBlocked).toBe(1) + + // #then enrichmentBlockedBySecret is 1 (ghp_ secret in ciFix.diffExcerpt) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(1) + + // #then neither secret appears in the serialized digest + const serialized = JSON.stringify(result) + expect(serialized).not.toContain('testowner/secret-repo') + expect(serialized).not.toContain('ghp_') + }) +}) diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index b49b9794f..d49d99151 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -133,10 +133,32 @@ export interface CandidateSignals { * * Design: top-level discriminated fields per variant (not a nested `evidence` block). * This keeps the diff minimal, preserves the existing ReviewCandidate shape exactly - * (only adding `trigger`), and avoids over-engineering before Unit 3 adds CiFix evidence. + * (only adding `trigger`), and avoids over-engineering. */ export type Candidate = ReviewCandidate | CiFixCandidate +/** + * Attached ci-fix evidence on a ReviewCandidate that also matched the ci-fail-then-pass + * trigger. Reuses the same field names as CiFixCandidate's evidence fields for consistency. + * Present only when the same PR matched both triggers (within-run dedup attaches it). + * A non-empty `diffExcerpt` means the evidence is substantive; an empty diffExcerpt means + * the evidence was cleared (e.g. by the privacy scan). + */ +export interface CiFixEvidence { + /** Name of the check that transitioned failed → passed. */ + failingCheckName: string + /** + * Diff excerpt between lastFailingSha and firstPassingSha, truncated to + * MAX_EXCERPT_CHARS_PER_CANDIDATE. Empty string when evidence was cleared. + */ + diffExcerpt: string + /** + * Best-effort excerpt from the failing job log. + * '[failure log purged or unavailable]' when the log could not be fetched. + */ + logExcerpt?: string +} + /** * A candidate sourced from a PR with multiple Fro Bot review rounds. * @@ -145,6 +167,10 @@ export type Candidate = ReviewCandidate | CiFixCandidate * bounded to MAX_EXCERPT_CHARS_PER_CANDIDATE. Empty when enrichment was blocked by the * upstream privacy scan or when no prose was available. Array form gives the agent clearer * structure than a single concatenated string. + * + * `ciFix` is present when the same PR also matched the ci-fail-then-pass trigger. The + * within-run dedup attaches the ci-fix evidence to the surviving ReviewCandidate instead + * of dropping it, so one proposal carries both signals. */ export interface ReviewCandidate { trigger: 'review-heavy' @@ -164,6 +190,13 @@ export interface ReviewCandidate { * the enriched content or when no non-empty prose was available. */ reviewExcerpts: string[] + /** + * Attached ci-fix evidence when this PR also matched the ci-fail-then-pass trigger. + * Populated by within-run dedup when a same-SHA dual-trigger pair is collapsed. + * Absent for pure review-heavy PRs. A non-empty diffExcerpt means the evidence is + * substantive; hasCiFixEvidence() checks this boundary precisely. + */ + ciFix?: CiFixEvidence } /** @@ -225,15 +258,24 @@ export interface DigestTelemetry { afterSolutionsDedup: number emitted: number /** - * Number of candidates whose enriched content was dropped by the private-name scan. + * Number of dual-trigger candidates: a review-heavy candidate that also carried attached + * ci-fix evidence (the PR drew review AND failed then fixed CI). Counts-only. + */ + dualTriggerCandidates: number + /** + * Number of evidence fields dropped by the private-name scan. * The candidate itself is kept (title-only); only the enriched evidence is cleared. + * A single dual-trigger candidate can contribute up to 2 (one per evidence type: + * reviewExcerpts and attached ciFix are scanned independently). * Counts-only — no private names logged. */ enrichmentBlocked: number /** - * Number of candidates whose enriched content was dropped because it contained a - * hard-secret shape (PAT, private key, credential-bearing connection string, etc.) - * detected by `logDiffHasSecret` after `redactLogDiffSecrets` was applied. + * Number of evidence fields dropped because they contained a hard-secret shape + * (PAT, private key, credential-bearing connection string, etc.) detected by + * `logDiffHasSecret` after `redactLogDiffSecrets` was applied. + * A single dual-trigger candidate can contribute up to 2 (one per evidence type: + * reviewExcerpts and attached ciFix are scanned independently). * Counts-only — no secret values logged. */ enrichmentBlockedBySecret: number @@ -305,8 +347,8 @@ export function parseMergeShaMarker(body: string): string | null { * Build the opaque candidate digest from injected inputs. * * Steps: - * 0. Within-run dedup: collapse to one candidate per mergeSha (a PR matching both triggers - * appears once per source); review-heavy takes precedence (R4). + * 0. Within-run dedup: collapse to one candidate per mergeSha. A PR matching both triggers + * keeps its review-heavy candidate with the ci-fix evidence attached (one proposal per SHA). * 1. Drop candidates whose mergeSha is already in openedLearningShas (seen-set dedup). * 2. Drop candidates whose signals strongly overlap an existing solutions doc (solutions dedup). * 3. Cap to maxLearnings. @@ -318,6 +360,26 @@ export function parseMergeShaMarker(body: string): string | null { * * No I/O. Fully unit-testable. The private token set is injected so the scan is pure. */ +/** + * Returns true when a candidate carries ci-fix evidence that should count toward the + * `selectWithCiFixFloor` reservation. + * + * A candidate "carries" ci-fix evidence when: + * - It is a pure CiFixCandidate (trigger === 'ci-fail-then-pass'), OR + * - It is a ReviewCandidate with a `ciFix` field whose `diffExcerpt` is non-empty. + * + * An empty-but-present `ciFix` (diffExcerpt === '') does NOT count — that means the + * evidence was cleared (e.g. by the privacy scan) and there is nothing substantive to + * reserve a slot for. + */ +export function hasCiFixEvidence(candidate: Candidate): boolean { + if (candidate.trigger === 'ci-fail-then-pass') return true + if (candidate.trigger === 'review-heavy' && candidate.ciFix !== undefined) { + return candidate.ciFix.diffExcerpt !== '' + } + return false +} + /** * Select up to `cap` candidates from `ordered` (freshness-ordered), guaranteeing that at least * `floor` ci-fail-then-pass candidates are included when that many exist. @@ -338,7 +400,7 @@ export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: n if (cap <= 0) return [] if (floor <= 0) return ordered.slice(0, cap) - const ciFix = ordered.filter(c => c.trigger === 'ci-fail-then-pass') + const ciFix = ordered.filter(c => hasCiFixEvidence(c)) if (ciFix.length === 0) return ordered.slice(0, cap) // Reserve the freshest ci-fix candidates (bounded by floor and cap), then fill the rest of @@ -353,19 +415,97 @@ export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: n return ordered.filter(c => chosen.has(c)).slice(0, cap) } +/** + * Shared pure helper: redact and scan ci-fix evidence fields. + * + * Redacts structural secrets from diffExcerpt, logExcerpt, and failingCheckName via + * redactLogDiffSecrets, then checks the combined text for private-name leaks and + * residual hard secrets. Returns the redacted values and boolean scan results so + * callers can apply their own clear-or-emit logic. + * + * Privacy-ordering invariant: inputs must already be truncated to budget before this + * call. Never move truncation after the scan. + * + * Pure function: no I/O, no side effects, no counter mutations. + */ +function scanCiFixEvidence( + diffExcerpt: string, + logExcerpt: string | undefined, + failingCheckName: string, + privateTokens: Set, +): { + redactedDiff: string + redactedLog: string + redactedCheckName: string + hasPrivateLeak: boolean + hasResidualSecret: boolean +} { + const rawLog = logExcerpt ?? '' + + // Step 1: redact structural secrets (paths, hostnames, Bearer tokens) + const redactedDiff = redactLogDiffSecrets(diffExcerpt) + const redactedLog = rawLog === '' ? rawLog : redactLogDiffSecrets(rawLog) + const redactedCheckName = redactLogDiffSecrets(failingCheckName) + const combinedText = `${redactedDiff}\n${redactedLog}\n${redactedCheckName}` + + // Step 2: check for private-name leak or residual hard secret after redaction + const hasPrivateLeak = learningBodyHasPrivateLeak(combinedText, privateTokens) + const hasResidualSecret = logDiffHasSecret(combinedText) + + return {redactedDiff, redactedLog, redactedCheckName, hasPrivateLeak, hasResidualSecret} +} + export function buildCandidateDigest(input: BuildCandidateDigestInput): CandidateDigest { // Within-run dedup: a PR that matched more than one trigger (e.g. review-heavy AND // ci-fail-then-pass) appears once per source in mergedPrs. Collapse to one candidate per - // mergeSha so a single PR yields a single learning-proposal (R4). Precedence: review-heavy - // wins — review prose is the richer signal when a PR matched both. + // mergeSha so a single PR yields a single learning-proposal. + // + // When a SHA has BOTH a review-heavy and a ci-fail-then-pass record, the ReviewCandidate + // SURVIVES with the CiFixCandidate's evidence ATTACHED to its `ciFix` field. This + // preserves the richest signal (review prose) while keeping the ci-fix evidence so the + // floor can reserve a slot for it. The review candidate keeps its original position in + // the output (freshness order preserved). + // + // Same-trigger duplicates (two review or two ci-fix same SHA) keep existing behavior: + // first-seen wins (the Map only updates when review-heavy takes precedence over ci-fix). + // Unique SHAs are unchanged. const byMergeSha = new Map() + // Track ci-fix candidates separately so we can attach their evidence to review candidates. + const ciFixBySha = new Map() + for (const pr of input.mergedPrs) { + // Record the first ci-fix candidate per SHA for potential attachment to a review candidate. + if (pr.trigger === 'ci-fail-then-pass' && !ciFixBySha.has(pr.mergeSha)) { + ciFixBySha.set(pr.mergeSha, pr) + } + const existing = byMergeSha.get(pr.mergeSha) if (existing === undefined || (existing.trigger !== 'review-heavy' && pr.trigger === 'review-heavy')) { byMergeSha.set(pr.mergeSha, pr) } } - const deduped = [...byMergeSha.values()] + + // Attach ci-fix evidence to any review-heavy candidate that has a matching ci-fix record. + const deduped: Candidate[] = [] + for (const candidate of byMergeSha.values()) { + if (candidate.trigger === 'review-heavy') { + const ciFixRecord = ciFixBySha.get(candidate.mergeSha) + if (ciFixRecord === undefined) { + // Pure review-heavy candidate — no ci-fix counterpart + deduped.push(candidate) + } else { + // Attach the ci-fix evidence to the surviving review candidate + const ciFix: CiFixEvidence = { + failingCheckName: ciFixRecord.failingCheckName, + diffExcerpt: ciFixRecord.diffExcerpt, + ...(ciFixRecord.logExcerpt === undefined ? {} : {logExcerpt: ciFixRecord.logExcerpt}), + } + deduped.push({...candidate, ciFix}) + } + } else { + deduped.push(candidate) + } + } // drop already-proposed merge SHAs const afterSeenDedup = deduped.filter(pr => !input.openedLearningShas.has(pr.mergeSha)) @@ -377,6 +517,11 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // review-heavy candidates can't starve the ci-fail-then-pass trigger. const capped = selectWithCiFixFloor(afterSolutionsDedup, input.maxLearnings, CIFIX_FLOOR) + // Count dual-trigger candidates: review-heavy candidates that carry attached ci-fix + // evidence (the PR drew review AND failed then fixed CI). Counted on the pre-scan capped set, + // so a candidate whose evidence is later dropped still counts as dual-trigger. + const dualTriggerCandidates = capped.filter(c => c.trigger === 'review-heavy' && c.ciFix !== undefined).length + // upstream privacy scan: scan each candidate's evidence text fail-closed. // // Privacy-ordering invariant: scan the already-truncated evidence text (truncation @@ -387,41 +532,86 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // For ReviewCandidate: // 1. Redact structural secrets (paths, hostnames, Bearer tokens) via redactLogDiffSecrets. // 2. If learningBodyHasPrivateLeak (private repo name) OR logDiffHasSecret (hard secret) - // still true after redaction → clear reviewExcerpts + increment enrichmentBlockedBySecret. + // still true after redaction → clear reviewExcerpts + increment the appropriate counter. // 3. Otherwise use the redacted excerpts. + // 4. If ciFix is attached, scan it INDEPENDENTLY (same redact-then-drop-on-residual pattern). + // A hit in ciFix drops only ciFix; a hit in reviewExcerpts drops only reviewExcerpts. + // Both fields are scanned every time — a private name in both is caught in both. // - // For CiFixCandidate (Unit 3 placeholder): - // Evidence fields are not yet populated; no scan needed until Unit 3 adds them. + // For CiFixCandidate: + // Evidence fields are scanned and redacted (diffExcerpt + logExcerpt + failingCheckName). let enrichmentBlocked = 0 let enrichmentBlockedBySecret = 0 const candidates = capped.map(pr => { if (pr.trigger === 'review-heavy') { - if (pr.reviewExcerpts.length === 0) return pr - - // Step 1: redact structural secrets from the already-truncated excerpts - const redactedExcerpts = pr.reviewExcerpts.map(e => redactLogDiffSecrets(e)) - const redactedText = redactedExcerpts.join('\n') - - // Step 2: check for private-name leak or residual hard secret after redaction - const hasPrivateLeak = learningBodyHasPrivateLeak(redactedText, input.privateTokens) - const hasResidualSecret = logDiffHasSecret(redactedText) - - if (hasPrivateLeak) { - // Private-name hit: clear enriched content, keep candidate title-only - enrichmentBlocked++ - return {...pr, reviewExcerpts: []} + // --- reviewExcerpts scan (existing) --- + let resultPr: ReviewCandidate = pr + + if (pr.reviewExcerpts.length > 0) { + // Step 1: redact structural secrets from the already-truncated excerpts + const redactedExcerpts = pr.reviewExcerpts.map(e => redactLogDiffSecrets(e)) + const redactedText = redactedExcerpts.join('\n') + + // Step 2: check for private-name leak or residual hard secret after redaction + const hasPrivateLeak = learningBodyHasPrivateLeak(redactedText, input.privateTokens) + const hasResidualSecret = logDiffHasSecret(redactedText) + + if (hasPrivateLeak) { + // Private-name hit: clear enriched content, keep candidate title-only + enrichmentBlocked++ + resultPr = {...pr, reviewExcerpts: []} + } else if (hasResidualSecret) { + // Hard-secret residual after redaction: clear enriched content + enrichmentBlockedBySecret++ + resultPr = {...pr, reviewExcerpts: []} + } else { + // Step 3: use the redacted excerpts (structural secrets replaced with [REDACTED]) + resultPr = {...pr, reviewExcerpts: redactedExcerpts} + } } - if (hasResidualSecret) { - // Hard-secret residual after redaction: clear enriched content - enrichmentBlockedBySecret++ - return {...pr, reviewExcerpts: []} + + // --- attached ciFix scan (INDEPENDENT of reviewExcerpts scan) --- + // Scanned every time ciFix is present, regardless of the reviewExcerpts result. + // A hit in ciFix drops only ciFix; a hit in reviewExcerpts drops only reviewExcerpts. + if (resultPr.ciFix !== undefined) { + const {redactedDiff, redactedLog, redactedCheckName, hasPrivateLeak, hasResidualSecret} = scanCiFixEvidence( + resultPr.ciFix.diffExcerpt, + resultPr.ciFix.logExcerpt, + resultPr.ciFix.failingCheckName, + input.privateTokens, + ) + + if (hasPrivateLeak) { + // Private-name hit in ciFix: clear ciFix evidence, keep reviewExcerpts result + enrichmentBlocked++ + resultPr = { + ...resultPr, + ciFix: {failingCheckName: '[REDACTED]', diffExcerpt: ''}, + } + } else if (hasResidualSecret) { + // Hard-secret residual in ciFix: clear ciFix evidence + enrichmentBlockedBySecret++ + resultPr = { + ...resultPr, + ciFix: {failingCheckName: '[REDACTED]', diffExcerpt: ''}, + } + } else { + // Step 3: use the redacted values + const redactedCiFix: CiFixEvidence = { + failingCheckName: redactedCheckName, + diffExcerpt: redactedDiff, + } + if (redactedLog !== '') { + redactedCiFix.logExcerpt = redactedLog + } + resultPr = {...resultPr, ciFix: redactedCiFix} + } } - // Step 3: use the redacted excerpts (structural secrets replaced with [REDACTED]) - return {...pr, reviewExcerpts: redactedExcerpts} + return resultPr } - // CiFixCandidate: scan and redact diffExcerpt + logExcerpt (R3/R3a). + // CiFixCandidate: scan and redact diffExcerpt + logExcerpt. // // Privacy-ordering invariant: diffExcerpt and logExcerpt are already truncated // to budget in the I/O shell before this pure core is called. Never move @@ -434,18 +624,12 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // increment the appropriate counter. // 3. Otherwise use the redacted values. if (pr.trigger === 'ci-fail-then-pass') { - const rawDiff = pr.diffExcerpt - const rawLog = pr.logExcerpt ?? '' - - // Step 1: redact structural secrets (including failingCheckName) - const redactedDiff = redactLogDiffSecrets(rawDiff) - const redactedLog = rawLog === '' ? rawLog : redactLogDiffSecrets(rawLog) - const redactedCheckName = redactLogDiffSecrets(pr.failingCheckName) - const combinedText = `${redactedDiff}\n${redactedLog}\n${redactedCheckName}` - - // Step 2: check for private-name leak or residual hard secret - const hasPrivateLeak = learningBodyHasPrivateLeak(combinedText, input.privateTokens) - const hasResidualSecret = logDiffHasSecret(combinedText) + const {redactedDiff, redactedLog, redactedCheckName, hasPrivateLeak, hasResidualSecret} = scanCiFixEvidence( + pr.diffExcerpt, + pr.logExcerpt, + pr.failingCheckName, + input.privateTokens, + ) if (hasPrivateLeak) { enrichmentBlocked++ @@ -476,6 +660,7 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat afterSeenDedup: afterSeenDedup.length, afterSolutionsDedup: afterSolutionsDedup.length, emitted: candidates.length, + dualTriggerCandidates, enrichmentBlocked, enrichmentBlockedBySecret, }, @@ -497,6 +682,15 @@ export function applyEnrichmentScanAvailability(candidates: Candidate[], scanAva if (scanAvailable) return candidates return candidates.map(c => { if (c.trigger === 'review-heavy') { + // Clear reviewExcerpts (existing) and also clear any attached ciFix evidence. + // No unscanned evidence of any type reaches the digest when the scan is unavailable. + if (c.ciFix !== undefined) { + return { + ...c, + reviewExcerpts: [] as string[], + ciFix: {failingCheckName: c.ciFix.failingCheckName, diffExcerpt: '', logExcerpt: undefined}, + } + } return {...c, reviewExcerpts: [] as string[]} } if (c.trigger === 'ci-fail-then-pass') { @@ -1668,7 +1862,7 @@ async function main(): Promise { // If the scan is unavailable, clear all enriched evidence before passing to the pure core. // This ensures no unscanned prose reaches the digest under any path. // applyEnrichmentScanAvailability handles both ReviewCandidate (clears reviewExcerpts) - // and CiFixCandidate (clears diffExcerpt + logExcerpt) — Unit 3 verified this. + // and CiFixCandidate (clears diffExcerpt + logExcerpt). const safeCandidates = applyEnrichmentScanAvailability(allCandidates, enrichmentScanAvailable) const digest = buildCandidateDigest({ @@ -1699,6 +1893,7 @@ async function main(): Promise { afterSeenDedup: 0, afterSolutionsDedup: 0, emitted: 0, + dualTriggerCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, },