From 29ae6a70af8d27041a2046e3ef42e3d8b9dc9f51 Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 21:13:51 -0700 Subject: [PATCH 1/5] docs: brainstorm and plan attaching ci-fix evidence to dual-trigger candidates Live validation showed the failed-then-fixed trigger detects candidates but never authors a proposal: within-run dedup drops the ci-fix candidate before the cap floor runs, and in this repo nearly every failed-then-fixed PR also drew review rounds. Attach the ci-fix evidence to the surviving review candidate instead of dropping it, retarget the floor to candidates that carry ci-fix evidence, and scan every populated evidence field on the final candidate. --- ...-merged-candidate-evidence-requirements.md | 135 +++++++++ ...-001-fix-merged-candidate-evidence-plan.md | 283 ++++++++++++++++++ 2 files changed, 418 insertions(+) create mode 100644 docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md create mode 100644 docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md diff --git a/docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md b/docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md new file mode 100644 index 000000000..752e966fc --- /dev/null +++ b/docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md @@ -0,0 +1,135 @@ +--- +title: 'Merged-candidate evidence for dual-trigger PRs' +date: 2026-06-23 +status: ready +scope: standard +kind: requirements +parent: docs/brainstorms/2026-06-22-c2-failed-then-fixed-capture-requirements.md +--- + +# Merged-candidate evidence for dual-trigger PRs + +## Purpose + +Fix a delivery bug exposed by live validation of the failed-then-fixed (ci-fix) capture +trigger: a PR that matched both triggers loses its ci-fix evidence before it can be authored. +Attach the ci-fix evidence to the review candidate instead of dropping it, so the +most-informative PRs — those that both drew review rounds and broke then fixed CI — yield one +proposal grounded in both signals. + +## The vision in one sentence + +When a PR both drew substantive review and failed-then-fixed CI, Fro Bot learns from the whole +story — what the reviewers flagged and what broke and got fixed — in a single proposal. + +## Problem frame + +Live validation proved the ci-fix trigger detects correctly (real candidates, real fixing +diffs, privacy gate clean) but never authored a proposal. Root cause: `buildCandidateDigest` +runs within-run dedup (one candidate per merge SHA, review-heavy precedence) *before* the cap +floor. In this repo nearly every substantive PR gets ≥2 Fro Bot review rounds, so a +failed-then-fixed PR is almost always also review-heavy; within-run dedup collapses it to +review-heavy and discards the ci-fix evidence, leaving the floor nothing to reserve. The two +mechanisms — R4 within-run dedup (drop, review-heavy wins) and the ci-fix cap floor — fight +each other. + +The fix reframes the conflict: a PR matching both triggers is not a tiebreak to arbitrate, it +is the **richest** learning source. Attach the ci-fix evidence to the surviving review +candidate instead of dropping it, so one proposal carries both signals. + +## Requirements + +- R1. Keep the discriminated `Candidate` union (`ReviewCandidate | CiFixCandidate`) — `trigger` + stays the branch key, preserving existing narrowing, privacy branches, emitters, and tests. + Add one optional field to `ReviewCandidate`: `ciFix?` (`failingCheckName`, `diffExcerpt`, + `logExcerpt?`). A `ReviewCandidate` may now carry attached ci-fix evidence; `CiFixCandidate` + remains for pure-ci-fix PRs (failed CI, no substantive review). This lower-churn shape was + chosen over collapsing the union into optional-everywhere fields, which is more refactor than + the bug needs. +- R2. Within-run collapse for a same-SHA dual-trigger PR becomes an **attach**, not a drop: the + `ReviewCandidate` survives (review prose is the richer base) and the `CiFixCandidate`'s + evidence is attached to its `ciFix` field. Pure single-trigger PRs are unchanged. One proposal + per merge SHA still holds (parent doc R4) — now via attach, not drop, so ci-fix evidence is + no longer discarded. +- R3. The upstream privacy scan runs **per evidence type, independently and fail-closed**, on + the **final merged candidate** (after the same-SHA merge, never on pre-merge fragments — merged + evidence must not bypass the scan). The scan iterates **every populated evidence field every + time**, not only the field where a hit was found: each present evidence type is redacted for + structural secrets first, then dropped if a private-repo-name or residual hard-secret survives. + A hit in one evidence type drops only that type; clean evidence of the other survives. Because + every populated type is always scanned, a private identifier appearing in both diff and review + prose is caught in both. No unscanned evidence of any type reaches the agent. +- R3a. **Serialization allowlist.** Only the allowlisted evidence/identifier fields may survive + into the digest the agent sees and the public issue. Absent evidence fields carry no metadata + forward; no raw PR object, owner/repo/number/title, or stale fragment may ride along on the + candidate. The authored body is built only from already-scanned evidence, and the open step's + body scan remains the final backstop. +- R4. The agent prompt has one branch that describes whatever evidence is present and instructs + the agent to distill from both when both exist (review prose + the failure→fix story). +- R5. The cap floor (`selectWithCiFixFloor`) stays, retargeted to reserve slots for candidates + that **carry ci-fix evidence** — a pure `CiFixCandidate` OR a `ReviewCandidate` with a `ciFix` + field attached (a `hasCiFixEvidence(candidate)` helper) — so ci-fix learnings are guaranteed + authoring slots regardless of the review-heavy backlog. The floor runs on the merged candidate set + **before** the privacy scan drops any evidence, so a candidate's floor eligibility is decided + by the ci-fix evidence it was harvested with, not by whether that evidence later survived + redaction. (A candidate selected by the floor whose ci-fix evidence is then dropped on a + privacy hit still proceeds — title-only or on its surviving review evidence — exactly as a + privacy hit behaves today.) +- R6. Telemetry stays counts-only: per-evidence-type blocked counters and a count of merged + (dual-trigger) candidates. "Merged" is counted by candidates carrying both evidence types; + review/ci-fix membership derives from which evidence fields are present (no separate + double-counted trigger tally). + +## Scope boundaries + +- Keep the discriminated union; attach `ciFix` evidence to the review candidate on a same-SHA + dual-trigger collapse. +- No change to the propose-only model, dedup-by-merge-SHA (still one proposal per SHA), the + per-run cap, the secret-scan pattern set, or the seen-set / solutions dedup behavior. +- Pure-review and pure-ci-fix PRs still produce single-evidence candidates — the merge only + changes the overlap case. + +### Deferred + +- C3 (issue triage), C4 (cross-run) — unchanged from the parent, still deferred. +- Any change to per-trigger cap budgeting beyond the existing floor. + +## Open questions (for planning) + +- **Q2 — Merge mechanics + ordering.** Where the same-SHA merge runs (it must run first, before + seen-set and solutions dedup, so later stages see the merged candidate), how freshness order + is preserved for the merged record, and the field-collision rules: `signals` when the two + records differ (review vs ci-fix harvested distinct token sets), and how a merged record's + evidence is assembled when one side was already empty. +- **Q5 — Prompt evidence presentation.** How the prompt presents both evidence sets so the + agent distills a unified learning rather than two stapled-together ones. + +(Resolved by the requirements, not open: the union is KEPT, with `ciFix?` attached to +`ReviewCandidate` (R1) — not a union collapse; the floor keys on `hasCiFixEvidence` covering +both the attached and pure cases (R5); the scan runs on the final candidate over every populated +evidence field incl. an attached `ciFix` (R3). Refactor posture: characterization-first to lock +current single-evidence behavior before adding the attach path.) + +## Success criteria + +- **SC1** — A PR that matched both triggers yields ONE candidate carrying both `reviewExcerpts` + and `ciFix` evidence (not collapsed to one source). +- **SC2** — A private-name or secret in the diff drops only the ci-fix evidence; clean review + prose on the same candidate survives (and vice versa). Mutation-proven per evidence type. +- **SC3** — The cap floor reserves a slot for a candidate carrying ci-fix evidence even when it + also carries review evidence and review-heavy candidates fill the rest. +- **SC4** — A live capture run authors a proposal grounded in ci-fix evidence (the bug this doc + fixes): `emitted` includes a candidate with ci-fix evidence, and the opened proposal + references the fixing diff. +- **SC5** — Pure-review and pure-ci-fix candidates still behave as before (characterization). + +## Sources & references + +- Parent: docs/brainstorms/2026-06-22-c2-failed-then-fixed-capture-requirements.md +- Plan: docs/plans/2026-06-22-004-feat-c2-failed-then-fixed-capture-plan.md +- The validation finding: capture run 28069117571 emitted 0 ci-fix candidates despite + detecting 3, because within-run dedup ran before the floor (memory of the diagnosis). +- Code: scripts/capture-learnings-harvest.ts (`Candidate` union, `buildCandidateDigest` + within-run dedup + `selectWithCiFixFloor`, `applyEnrichmentScanAvailability`), + scripts/capture-learnings-privacy.ts (the shared gate), scripts/capture-learnings-open.ts + (title/marker), .github/workflows/capture-learnings.yaml (the prompt) diff --git a/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md b/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md new file mode 100644 index 000000000..6e38e4786 --- /dev/null +++ b/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md @@ -0,0 +1,283 @@ +--- +title: 'fix: attach ci-fix evidence to dual-trigger candidates' +type: fix +status: active +date: 2026-06-23 +origin: docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md +--- + +# fix: attach ci-fix evidence to dual-trigger candidates + +## Overview + +A PR that matched both capture triggers loses its ci-fix evidence: within-run dedup runs before +the cap floor and drops the ci-fix candidate (review-heavy wins), so the floor has nothing to +reserve and a failed-then-fixed learning is never authored. Fix it by attaching the ci-fix +evidence to the surviving review candidate instead of dropping it, retargeting the floor to +"carries ci-fix evidence", and scanning every populated evidence field on the final candidate. + +## Problem Frame + +Live validation (capture run that examined 106 merged PRs) detected 3 ci-fix candidates yet +emitted 0: in this repo nearly every substantive PR also gets ≥2 review rounds, so the +review-heavy precedence in within-run dedup collapses every dual-trigger PR to review-heavy +before `selectWithCiFixFloor` runs. The dedup and the floor fight each other. The dual-trigger +PR is the richest learning source, so attach both evidence sets rather than arbitrate +(see origin). + +## Requirements Trace + +- R1. Keep the discriminated union; add optional `ciFix?` evidence to `ReviewCandidate` (origin R1). +- R2. Within-run collapse attaches the ci-fix evidence to the surviving review candidate instead + of dropping it; one proposal per merge SHA still holds (origin R2). +- R3. The privacy scan runs on the final candidate, over every populated evidence field + (`reviewExcerpts` and an attached `ciFix`), each redacted-then-dropped-on-residual + independently (origin R3). +- R3a. Serialization allowlist: only allowlisted evidence/identifier fields reach the digest and + public issue; no raw PR object or stale fragment rides along (origin R3a). +- R4. The agent prompt distills from both evidence sets when both are present (origin R4). +- R5. `selectWithCiFixFloor` reserves slots for candidates that carry ci-fix evidence — a pure + `CiFixCandidate` or a `ReviewCandidate` with `ciFix` attached, via `hasCiFixEvidence` (origin R5). +- R6. Counts-only telemetry: per-evidence-type blocked counters and a dual-trigger (merged) count + (origin R6). + +## Scope Boundaries + +- Keep the discriminated union; attach `ciFix` to `ReviewCandidate` on same-SHA dual-trigger collapse. +- No change to the propose-only model, dedup-by-merge-SHA, the per-run cap, the secret-scan + pattern set, or the seen-set / solutions dedup. +- Pure-review and pure-ci-fix PRs are unchanged. + +### Deferred to Separate Tasks + +- C3 (issue triage), C4 (cross-run) — unchanged, still deferred. +- Collapsing the union into a fully unified optional-fields candidate — rejected as more churn + than the bug needs (origin). + +## Context & Research + +### Relevant Code and Patterns + +- `scripts/capture-learnings-harvest.ts`: + - `Candidate = ReviewCandidate | CiFixCandidate` — add optional `ciFix?` to `ReviewCandidate`. + - `buildCandidateDigest` within-run dedup (~line 361-367): the `byMergeSha` Map keeps one object + per SHA with review-heavy precedence — change to attach the `CiFixCandidate`'s evidence onto + the surviving `ReviewCandidate`. + - The privacy branches (~line 385-413 ReviewCandidate, ~line 385-413 CiFixCandidate): a + `ReviewCandidate` with `ciFix` must now scan BOTH `reviewExcerpts` and the attached `ciFix` + evidence, each independently, on the final candidate. + - `selectWithCiFixFloor` (~line 333): change the predicate from `trigger === 'ci-fail-then-pass'` + to a `hasCiFixEvidence(candidate)` helper covering the attached and pure cases. + - `applyEnrichmentScanAvailability`: when the scan is unavailable, also clear an attached `ciFix` + on a `ReviewCandidate` (today it clears `reviewExcerpts` for review and diff/log for ci-fix). +- `scripts/capture-learnings-open.ts`: title/marker work off `mergeSha` — unaffected by the attach. +- `.github/workflows/capture-learnings.yaml`: the agent prompt — the review branch must mention + the optional attached ci-fix evidence. + +### Institutional Learnings + +- `docs/solutions/best-practices/pure-core-privacy-gates-shared-module-2026-06-22.md` — the scan + stays pure-core, mutation-proven; scanning the final candidate over all fields preserves the + two-chokepoint fail-closed property. +- `docs/solutions/security-issues/verify-whole-public-perimeter-2026-06-22.md` — the attached + evidence is a new field on the public-facing candidate; the allowlist (R3a) keeps the perimeter closed. + +## Key Technical Decisions + +- **Attach, don't collapse.** Keep `trigger` as the branch key; add `ciFix?` to `ReviewCandidate`. + Preserves existing narrowing, privacy branches, emitters, and most tests. Lower churn than a + unified optional-fields candidate, same outcome. +- **Attach runs in within-run dedup, before seen/solutions dedup and the floor**, so every later + stage sees the merged candidate. Freshness order preserved (the review candidate keeps its + position). +- **`hasCiFixEvidence(candidate)` helper** — true for a `CiFixCandidate` or a `ReviewCandidate` + with a non-empty `ciFix`. Used by the floor; defines "carries ci-fix evidence" in one place. +- **Scan all populated evidence on the final candidate.** A `ReviewCandidate` with `ciFix` scans + `reviewExcerpts` AND the attached diff/log independently; a hit in one drops only that one. The + floor decides eligibility on harvested evidence, before any privacy drop (origin R5). + +## Open Questions + +### Resolved During Planning + +- Shape → attach `ciFix?` to `ReviewCandidate`, keep the union (origin). +- Privacy on the merged candidate → scan every populated field on the final candidate, per-type + independent drop (origin R3). +- Floor predicate → `hasCiFixEvidence` (origin R5). + +### Deferred to Implementation + +- The exact `ciFix` field name/shape on `ReviewCandidate` (reuse the `CiFixCandidate` evidence + fields verbatim vs a nested object) — pick the minimal-diff form during implementation. +- Whether `applyEnrichmentScanAvailability` needs a new branch or a tweak to the existing review + branch to also clear an attached `ciFix`. +- The exact telemetry field name for the dual-trigger count. + +## High-Level Technical Design + +> *Directional guidance for review, not implementation specification.* + +``` +within-run dedup (buildCandidateDigest): + for each candidate by mergeSha: + same SHA, one ReviewCandidate + one CiFixCandidate + → keep the ReviewCandidate, set reviewCandidate.ciFix = ciFixCandidate's evidence (ATTACH) + same SHA, two of the same trigger → existing behavior + unique SHA → unchanged + ↓ (seen-set dedup, solutions dedup — unchanged) +selectWithCiFixFloor(ordered, cap, floor): + reserve = filter(hasCiFixEvidence) // was: trigger === 'ci-fail-then-pass' + ...unchanged... + ↓ +privacy scan (per candidate, final): + if ReviewCandidate: + scan reviewExcerpts (redact → drop-on-residual) // existing + if candidate.ciFix: scan ciFix.diff + ciFix.log (independently, same way) // NEW + if CiFixCandidate: scan ciFix evidence // existing + ↓ opaque digest (allowlisted fields only) +agent prompt: review branch notes the optional attached failure→fix evidence +``` + +## Implementation Units + +- [ ] **Unit 1: Attach ci-fix evidence in within-run dedup + `hasCiFixEvidence` + floor predicate** + +**Goal:** A same-SHA dual-trigger PR keeps its ci-fix evidence (attached to the review candidate), +and the floor reserves slots for any candidate carrying ci-fix evidence. + +**Requirements:** R1, R2, R5. + +**Dependencies:** None. + +**Files:** +- Modify: `scripts/capture-learnings-harvest.ts` +- Test: `scripts/capture-learnings-harvest.test.ts` + +**Approach:** +- Add optional `ciFix?` (the `CiFixCandidate` evidence: `failingCheckName`, `diffExcerpt`, + `logExcerpt?`) to `ReviewCandidate`. Change the within-run dedup so when a SHA has both a review + and a ci-fix record, the review candidate survives with `ciFix` attached. Add + `hasCiFixEvidence(candidate)` and use it in `selectWithCiFixFloor` instead of the trigger check. + +**Execution note:** Characterization-first — lock the current single-trigger dedup + floor behavior +before changing it. + +**Patterns to follow:** the existing `byMergeSha` dedup, `selectWithCiFixFloor`, the `CiFixCandidate` +evidence fields. + +**Test scenarios:** +- Characterization: a pure-review PR and a pure-ci-fix PR (distinct SHAs) still produce their + current single-evidence candidates; the floor still reserves the pure-ci-fix one. +- Happy: a same-SHA review+ci-fix pair → one `ReviewCandidate` with `ciFix` attached (the + regression for the live bug). Assert the ci-fix evidence is present on the survivor. +- Floor: a dual-trigger candidate (review + attached ci-fix) is reserved by the floor even when + review-heavy candidates fill the rest — the exact production starvation, now fixed. +- Edge: `hasCiFixEvidence` true for pure ci-fix and for review-with-ciFix, false for pure review. +- Edge: two review records same SHA, or two ci-fix records same SHA → existing dedup unchanged. + +**Verification:** gates green; the dual-trigger PR retains ci-fix evidence and the floor reserves it. + +- [ ] **Unit 2: Scan attached ci-fix evidence in the privacy step** + +**Goal:** A `ReviewCandidate` with attached `ciFix` has BOTH its review prose and its diff/log +scanned independently, fail-closed, on the final candidate. + +**Requirements:** R3, R3a. + +**Dependencies:** Unit 1. + +**Files:** +- Modify: `scripts/capture-learnings-harvest.ts` +- Modify: `scripts/capture-learnings-harvest.test.ts` + +**Approach:** +- In `buildCandidateDigest`'s ReviewCandidate privacy branch, after scanning `reviewExcerpts`, also + scan an attached `ciFix` (redact structural secrets, then drop the attached `ciFix` on a residual + private-name or hard-secret hit) — independently from the review-prose result. Update + `applyEnrichmentScanAvailability` so an unavailable scan also clears an attached `ciFix`. Keep the + serialization allowlist: only allowlisted fields reach the digest. + +**Execution note:** Test-first; the cross-field privacy mutation proof is load-bearing. + +**Patterns to follow:** the existing ReviewCandidate and CiFixCandidate privacy branches (redact → +drop-on-residual), the truncate-then-scan ordering invariant. + +**Test scenarios:** +- Happy: a dual candidate with clean review prose + clean diff → both survive, redacted. +- Privacy (independent drop): a secret in the attached diff drops ONLY the `ciFix`; clean + `reviewExcerpts` survive. And the reverse (private name in review prose drops only review). +- Privacy (cross-field): a private name in BOTH review prose and the attached diff → caught in + both (every populated field scanned every time). Mutation-proven: bypassing the attached-ciFix + scan lets the diff secret reach the digest → test fails. +- Scan-unavailable: `applyEnrichmentScanAvailability(false)` clears reviewExcerpts AND attached ciFix. +- Allowlist: the emitted dual candidate carries only allowlisted keys (no owner/repo/number/title). + +**Verification:** gates green; no unscanned attached evidence reaches the digest; mutation proof bites. + +- [ ] **Unit 3: Prompt + telemetry for dual-trigger candidates** + +**Goal:** The agent distills from both evidence sets when present; telemetry counts dual-trigger +candidates and per-evidence-type blocks. + +**Requirements:** R4, R6. + +**Dependencies:** Unit 2. + +**Files:** +- Modify: `.github/workflows/capture-learnings.yaml` +- Modify: `scripts/capture-learnings-harvest.ts` (telemetry counters) +- Modify: `scripts/capture-learnings-harvest.test.ts` + +**Approach:** +- Update the agent prompt's review branch to note that a review candidate may also carry the + failure→fix evidence (`ciFix`), and instruct distilling from both when present. Add a + dual-trigger (merged) count and ensure the per-evidence-type blocked counters + (`enrichmentBlocked`, `enrichmentBlockedBySecret`) cover the attached-ciFix drops. Surface the + dual-trigger count in the step summary. + +**Test scenarios:** +- Telemetry: a run with one dual-trigger candidate increments the merged count; a per-type drop + increments the right blocked counter. + +**Test expectation for the YAML prompt:** none — prompt wording, validated by actionlint + a manual +dispatch showing a dual-trigger proposal that cites the fixing diff. + +**Verification:** actionlint clean; a manual capture run authors a proposal grounded in both the +review prose and the fixing diff (the live proof the original bug is fixed). + +## System-Wide Impact + +- **Interaction graph:** the attach changes only the within-run dedup branch; seen/solutions dedup, + cap, floor (predicate swap), and privacy (added attached-field scan) are the touch points. The + open step and emitters are unaffected. +- **Error propagation:** privacy scan stays fail-closed per evidence field; an attached-ciFix hit + drops only that field. +- **State lifecycle risks:** none new — no persisted state; one proposal per merge SHA preserved. +- **API surface parity:** the shared privacy module is unchanged (the secret/redaction functions + already exist); both chokepoints still use it. +- **Unchanged invariants:** propose-only model, merge-SHA marker, `learning-proposal` label, the + per-run cap, opacity (allowlisted fields only), the discriminated union itself. + +## Risks & Dependencies + +| Risk | Mitigation | +|------|------------| +| Attached ci-fix evidence bypasses the scan | R3: scan the final candidate over every populated field; mutation-proven | +| Floor predicate misses attached evidence | `hasCiFixEvidence` covers both pure and attached; tested | +| Refactor breaks current single-trigger behavior | Characterization-first locks pure-review/pure-ci-fix before the change | +| Empty-but-present `ciFix` confuses `hasCiFixEvidence` | Define "carries" precisely (non-empty diff); test the empty/absent boundary | + +## Documentation / Operational Notes + +- After it lands, dispatch a capture run to confirm a dual-trigger proposal opens grounded in the + fixing diff — the live proof the delivery bug is fixed (SC4). + +## Sources & References + +- **Origin document:** docs/brainstorms/2026-06-23-merged-candidate-evidence-requirements.md +- Parent: docs/brainstorms/2026-06-22-c2-failed-then-fixed-capture-requirements.md, + docs/plans/2026-06-22-004-feat-c2-failed-then-fixed-capture-plan.md +- Code: scripts/capture-learnings-harvest.ts, scripts/capture-learnings-privacy.ts, + scripts/capture-learnings-open.ts, .github/workflows/capture-learnings.yaml +- Learnings: docs/solutions/best-practices/pure-core-privacy-gates-shared-module-2026-06-22.md, + docs/solutions/security-issues/verify-whole-public-perimeter-2026-06-22.md From ece9b4e33e17f99cabf0522e10025a187aa88a45 Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 21:26:04 -0700 Subject: [PATCH 2/5] fix(capture): attach ci-fix evidence to dual-trigger candidates instead of dropping it When a PR matched both triggers, within-run dedup dropped the failed-then-fixed candidate, so the cap floor had nothing to reserve and the learning was never authored. Attach the ci-fix evidence to the surviving review candidate, and key the floor on whether a candidate carries ci-fix evidence (pure or attached) so those candidates are guaranteed a slot. The review candidate keeps its position, so freshness order is preserved. --- scripts/capture-learnings-harvest.test.ts | 437 +++++++++++++++++++++- scripts/capture-learnings-harvest.ts | 103 ++++- 2 files changed, 529 insertions(+), 11 deletions(-) diff --git a/scripts/capture-learnings-harvest.test.ts b/scripts/capture-learnings-harvest.test.ts index 109224346..998842487 100644 --- a/scripts/capture-learnings-harvest.test.ts +++ b/scripts/capture-learnings-harvest.test.ts @@ -21,6 +21,7 @@ import { FRO_BOT_REVIEWER_LOGINS, harvestCandidates, harvestCiFixCandidates, + hasCiFixEvidence, LEARNING_PROPOSAL_LABEL, MAX_EXCERPT_CHARS_PER_CANDIDATE, parseMergeShaMarker, @@ -191,18 +192,21 @@ describe('buildCandidateDigest', () => { }) describe('opacity guarantee', () => { - it('review-heavy candidate has ONLY trigger, mergeSha, reviewRounds, signals, reviewExcerpts — no owner/repo/number/title', () => { - // #given a review-heavy candidate + it('review-heavy candidate has ONLY allowed keys — no owner/repo/number/title', () => { + // #given a review-heavy candidate (pure review, no ci-fix) const input = makeDigestInput() // #when building the digest const result = buildCandidateDigest(input) - // #then each review-heavy candidate has exactly the allowed keys + // #then each review-heavy candidate has only the allowed keys + // ciFix is optional — present only when the PR also matched ci-fail-then-pass + const baseAllowedKeys = ['mergeSha', 'reviewExcerpts', 'reviewRounds', 'signals', 'trigger'].sort() + const allowedKeysWithCiFix = [...baseAllowedKeys, 'ciFix'].sort() for (const candidate of result.candidates) { if (candidate.trigger !== 'review-heavy') continue const keys = Object.keys(candidate).sort() - expect(keys).toEqual(['mergeSha', 'reviewExcerpts', 'reviewRounds', 'signals', 'trigger'].sort()) + expect([baseAllowedKeys, allowedKeysWithCiFix]).toContainEqual(keys) // Explicitly assert forbidden keys are absent expect(candidate).not.toHaveProperty('owner') expect(candidate).not.toHaveProperty('repo') @@ -618,7 +622,7 @@ describe('buildCandidateDigest — CiFixCandidate union support', () => { expect(result.telemetry.afterSeenDedup).toBe(0) }) - it('within-run dedup (R4): same SHA from both triggers yields one candidate, review-heavy wins', () => { + it('within-run dedup (R4): same SHA from both triggers yields one candidate, review-heavy wins WITH ci-fix attached', () => { // #given a review-heavy and a ci-fix candidate with the SAME mergeSha, NOT yet proposed // (a PR that matched both triggers in this run) const sha = 'bothtrg1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' @@ -633,6 +637,429 @@ describe('buildCandidateDigest — CiFixCandidate union support', () => { // #then exactly one candidate is emitted (R4), and it is the review-heavy one expect(result.candidates).toHaveLength(1) expect(result.candidates[0]?.trigger).toBe('review-heavy') + // #then the ci-fix evidence is ATTACHED to the surviving review candidate (R2) + // (not dropped — the dual-trigger PR is the richest learning source) + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix).toBeDefined() + expect(result.candidates[0].ciFix?.failingCheckName).toBe(ciFixCandidate.failingCheckName) + expect(result.candidates[0].ciFix?.diffExcerpt).toBe(ciFixCandidate.diffExcerpt) + } + }) +}) + +// --------------------------------------------------------------------------- +// CHARACTERIZATION: pure-review and pure-ci-fix single-trigger behavior is unchanged +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — CHARACTERIZATION: single-trigger behavior unchanged', () => { + it('CHARACTERIZATION: pure-review PR (distinct SHA) still produces a ReviewCandidate with no ciFix', () => { + // #given a pure review-heavy candidate with a unique SHA (no ci-fix counterpart) + // This locks the current behavior: pure-review PRs are unaffected by the dual-trigger attach. + const sha = 'purereview1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const reviewCandidate = makeCandidate({ + mergeSha: sha, + reviewRounds: 3, + reviewExcerpts: ['Fix the null check.'], + }) + const input = makeDigestInput({mergedPrs: [reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted + expect(result.candidates).toHaveLength(1) + // #then it is a ReviewCandidate with the expected fields + expect(result.candidates[0]?.trigger).toBe('review-heavy') + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].mergeSha).toBe(sha) + expect(result.candidates[0].reviewRounds).toBe(3) + expect(result.candidates[0].reviewExcerpts).toEqual(['Fix the null check.']) + // #then NO ciFix is attached (pure review — no ci-fix counterpart) + expect(result.candidates[0].ciFix).toBeUndefined() + } + }) + + it('CHARACTERIZATION: pure-ci-fix PR (distinct SHA) still produces a CiFixCandidate unchanged', () => { + // #given a pure ci-fix candidate with a unique SHA (no review counterpart) + // This locks the current behavior: pure-ci-fix PRs are unaffected by the dual-trigger attach. + const sha = 'purefix001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / lint', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }) + const input = makeDigestInput({mergedPrs: [ciFixCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted + expect(result.candidates).toHaveLength(1) + // #then it is a CiFixCandidate with the expected fields + expect(result.candidates[0]?.trigger).toBe('ci-fail-then-pass') + if (result.candidates[0]?.trigger === 'ci-fail-then-pass') { + expect(result.candidates[0].mergeSha).toBe(sha) + expect(result.candidates[0].failingCheckName).toBe('CI / lint') + expect(result.candidates[0].diffExcerpt).toContain('-bad') + } + }) + + it('CHARACTERIZATION: selectWithCiFixFloor still reserves a slot for a pure-ci-fix candidate', () => { + // #given the production starvation scenario with a pure ci-fix candidate + // This locks the floor behavior for pure-ci-fix candidates (unchanged by this unit). + const sha = (n: number): string => `${n}`.padStart(40, '0') + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const pureCiFix = makeCiFixCandidate({mergeSha: sha(99)}) + const ordered = [...reviewHeavy, pureCiFix] + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the pure ci-fix candidate is reserved (unchanged behavior) + expect(selected).toHaveLength(5) + expect(selected).toContain(pureCiFix) + expect(selected.filter(c => c.trigger === 'review-heavy')).toHaveLength(4) + }) +}) + +// --------------------------------------------------------------------------- +// Unit 1: hasCiFixEvidence helper +// --------------------------------------------------------------------------- + +describe('hasCiFixEvidence', () => { + it('returns true for a pure CiFixCandidate', () => { + // #given a pure ci-fix candidate + const candidate = makeCiFixCandidate() + // #then hasCiFixEvidence is true + expect(hasCiFixEvidence(candidate)).toBe(true) + }) + + it('returns true for a ReviewCandidate with a non-empty ciFix.diffExcerpt', () => { + // #given a review candidate with attached ci-fix evidence (non-empty diff) + const candidate: ReviewCandidate = { + ...makeCandidate(), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }, + } + // #then hasCiFixEvidence is true + expect(hasCiFixEvidence(candidate)).toBe(true) + }) + + it('returns false for a pure ReviewCandidate (no ciFix)', () => { + // #given a pure review candidate with no ciFix field + const candidate = makeCandidate() + // #then hasCiFixEvidence is false + expect(hasCiFixEvidence(candidate)).toBe(false) + }) + + it('returns false for a ReviewCandidate with an empty ciFix.diffExcerpt', () => { + // #given a review candidate with a ciFix field but empty diffExcerpt + // (evidence was cleared, e.g. by the privacy scan) + const candidate: ReviewCandidate = { + ...makeCandidate(), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '', // empty — not substantive + }, + } + // #then hasCiFixEvidence is false (empty diff = no substantive evidence) + expect(hasCiFixEvidence(candidate)).toBe(false) + }) + + it('returns true for a ReviewCandidate with ciFix that has a logExcerpt but non-empty diff', () => { + // #given a review candidate with attached ci-fix evidence including a log excerpt + const candidate: ReviewCandidate = { + ...makeCandidate(), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + logExcerpt: 'Error: test failed at line 42', + }, + } + // #then hasCiFixEvidence is true (non-empty diff is the key signal) + expect(hasCiFixEvidence(candidate)).toBe(true) + }) +}) + +// --------------------------------------------------------------------------- +// Unit 1: dual-trigger attach — the live-bug regression +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — dual-trigger attach (Unit 1 regression)', () => { + it('REGRESSION: same-SHA review+ci-fix pair → ONE ReviewCandidate with ciFix attached', () => { + // #given a review-heavy and a ci-fix candidate with the SAME mergeSha + // This is the live bug: before this fix, the ci-fix evidence was dropped. + const sha = 'dualtrg1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / test', + diffExcerpt: '--- a/scripts/foo.ts\n+++ b/scripts/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }) + const reviewCandidate = makeCandidate({ + mergeSha: sha, + reviewRounds: 3, + reviewExcerpts: ['Fix the null check.'], + }) + // ci-fix listed first to prove precedence is by trigger, not array order + const input = makeDigestInput({mergedPrs: [ciFixCandidate, reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly ONE candidate is emitted (one proposal per SHA) + expect(result.candidates).toHaveLength(1) + + // #then the survivor is a ReviewCandidate (review prose is the richer base) + expect(result.candidates[0]?.trigger).toBe('review-heavy') + + // #then the ci-fix evidence IS ATTACHED (not dropped — the regression fix) + if (result.candidates[0]?.trigger === 'review-heavy') { + const emitted = result.candidates[0] + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.failingCheckName).toBe('CI / test') + expect(emitted.ciFix?.diffExcerpt).toContain('-bad') + // #then the review evidence is also preserved + expect(emitted.reviewRounds).toBe(3) + expect(emitted.reviewExcerpts).toEqual(['Fix the null check.']) + } + }) + + it('dual-trigger: review listed first, ci-fix listed second → same attach result', () => { + // #given review-heavy listed BEFORE ci-fix (opposite order from the regression test) + const sha = 'dualtrg2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const reviewCandidate = makeCandidate({mergeSha: sha, reviewRounds: 2}) + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / lint', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-x\n+y', + }) + const input = makeDigestInput({mergedPrs: [reviewCandidate, ciFixCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then one ReviewCandidate with ciFix attached regardless of input order + expect(result.candidates).toHaveLength(1) + expect(result.candidates[0]?.trigger).toBe('review-heavy') + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix).toBeDefined() + expect(result.candidates[0].ciFix?.failingCheckName).toBe('CI / lint') + } + }) + + it('dual-trigger: logExcerpt is attached when present on the ci-fix candidate', () => { + // #given a ci-fix candidate with a logExcerpt + const sha = 'dualtrg3aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const ciFixCandidate = makeCiFixCandidate({ + mergeSha: sha, + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + logExcerpt: 'Error: test failed at line 42', + }) + const reviewCandidate = makeCandidate({mergeSha: sha}) + const input = makeDigestInput({mergedPrs: [ciFixCandidate, reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the logExcerpt is also attached + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix?.logExcerpt).toBe('Error: test failed at line 42') + } + }) + + it('dual-trigger: logExcerpt is absent when not present on the ci-fix candidate', () => { + // #given a ci-fix candidate WITHOUT a logExcerpt + const sha = 'dualtrg4aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const ciFixCandidate = makeCiFixCandidate({mergeSha: sha}) // no logExcerpt in fixture + const reviewCandidate = makeCandidate({mergeSha: sha}) + const input = makeDigestInput({mergedPrs: [ciFixCandidate, reviewCandidate]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then logExcerpt is absent (not set to undefined explicitly — just not present) + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix?.logExcerpt).toBeUndefined() + } + }) +}) + +// --------------------------------------------------------------------------- +// Unit 1: floor-fix — dual-trigger candidate is reserved by selectWithCiFixFloor +// --------------------------------------------------------------------------- + +describe('selectWithCiFixFloor — floor-fix: dual-trigger candidate is reserved', () => { + const sha = (n: number): string => `${n}`.padStart(40, '0') + + it('FLOOR FIX: a dual-trigger ReviewCandidate (with ciFix) is reserved by the floor even when review-heavy candidates fill the rest', () => { + // #given the production starvation scenario, now with a dual-trigger candidate + // (review-heavy + attached ci-fix) instead of a pure ci-fix candidate. + // Before this fix, the dual-trigger candidate would NOT be reserved (trigger !== 'ci-fail-then-pass'). + const dualTriggerCandidate: ReviewCandidate = { + ...makeCandidate({mergeSha: sha(99)}), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }, + } + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const ordered = [...reviewHeavy, dualTriggerCandidate] // dual-trigger sorts LAST + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the dual-trigger candidate IS included (hasCiFixEvidence is true for it) + expect(selected).toHaveLength(5) + expect(selected).toContain(dualTriggerCandidate) + // #then 4 review-heavy candidates fill the rest (pure review, no ciFix) + const pureReviewSelected = selected.filter( + c => c.trigger === 'review-heavy' && !('ciFix' in c && c.ciFix !== undefined), + ) + expect(pureReviewSelected).toHaveLength(4) + }) + + it('FLOOR FIX: a ReviewCandidate with empty ciFix.diffExcerpt is NOT reserved (evidence cleared)', () => { + // #given a review candidate with a ciFix field but empty diffExcerpt (evidence cleared) + const clearedCiFix: ReviewCandidate = { + ...makeCandidate({mergeSha: sha(99)}), + ciFix: { + failingCheckName: 'CI / test', + diffExcerpt: '', // cleared — not substantive + }, + } + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const ordered = [...reviewHeavy, clearedCiFix] + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the cleared-ciFix candidate is NOT reserved (hasCiFixEvidence is false) + // It may or may not be included depending on position, but NOT via the floor reservation + // With 8 review-heavy + 1 cleared-ciFix, cap=5 → only the first 5 review-heavy are taken + expect(selected).not.toContain(clearedCiFix) + }) + + it('FLOOR FIX: buildCandidateDigest end-to-end — dual-trigger candidate is reserved by the floor', () => { + // #given the production starvation scenario end-to-end: + // - A same-SHA review+ci-fix pair (will be collapsed to a dual-trigger ReviewCandidate) + // - Enough pure review-heavy candidates to fill the cap without the floor + const dualSha = 'dualfloor1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const ciFixForDual = makeCiFixCandidate({ + mergeSha: dualSha, + failingCheckName: 'CI / test', + diffExcerpt: '--- a/foo.ts\n+++ b/foo.ts\n@@ -1 +1 @@\n-bad\n+good', + }) + const reviewForDual = makeCandidate({mergeSha: dualSha, reviewRounds: 2}) + + // 8 pure review-heavy candidates that would fill the cap without the floor + const pureReview = Array.from({length: 8}, (_, i) => + makeCandidate({mergeSha: `purereview${i}${'0'.repeat(30 - String(i).length)}`}), + ) + + // ci-fix listed first, then review, then pure review (ci-fix sorts last in the list) + const input = makeDigestInput({ + mergedPrs: [...pureReview, ciFixForDual, reviewForDual], + maxLearnings: 5, + }) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly 5 candidates are emitted (cap) + expect(result.candidates).toHaveLength(5) + + // #then the dual-trigger candidate IS in the output (floor reserved it) + const dualCandidate = result.candidates.find(c => c.mergeSha === dualSha) + expect(dualCandidate).toBeDefined() + expect(dualCandidate?.trigger).toBe('review-heavy') + if (dualCandidate?.trigger === 'review-heavy') { + // #then the ci-fix evidence is attached + expect(dualCandidate.ciFix).toBeDefined() + expect(dualCandidate.ciFix?.failingCheckName).toBe('CI / test') + } + }) +}) + +// --------------------------------------------------------------------------- +// Unit 1: edge cases — same-trigger duplicates and freshness order +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — dual-trigger edge cases', () => { + it('EDGE: two review-heavy records with same SHA → first-seen wins (existing behavior unchanged)', () => { + // #given two review-heavy candidates with the SAME SHA + const sha = 'tworev001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const review1 = makeCandidate({mergeSha: sha, reviewRounds: 2}) + const review2 = makeCandidate({mergeSha: sha, reviewRounds: 5}) // different rounds + const input = makeDigestInput({mergedPrs: [review1, review2]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted (first-seen wins for same-trigger) + expect(result.candidates).toHaveLength(1) + expect(result.candidates[0]?.trigger).toBe('review-heavy') + // #then no ciFix is attached (no ci-fix counterpart) + if (result.candidates[0]?.trigger === 'review-heavy') { + expect(result.candidates[0].ciFix).toBeUndefined() + } + }) + + it('EDGE: two ci-fix records with same SHA → first-seen wins (existing behavior unchanged)', () => { + // #given two ci-fix candidates with the SAME SHA + const sha = 'twocifix1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' + const cifix1 = makeCiFixCandidate({mergeSha: sha, failingCheckName: 'CI / test'}) + const cifix2 = makeCiFixCandidate({mergeSha: sha, failingCheckName: 'CI / lint'}) + const input = makeDigestInput({mergedPrs: [cifix1, cifix2]}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is emitted (first-seen wins for same-trigger) + expect(result.candidates).toHaveLength(1) + expect(result.candidates[0]?.trigger).toBe('ci-fail-then-pass') + // #then the first ci-fix candidate's check name is used + if (result.candidates[0]?.trigger === 'ci-fail-then-pass') { + expect(result.candidates[0].failingCheckName).toBe('CI / test') + } + }) + + it('ORDER: dual-trigger candidate keeps its review candidate position in the output', () => { + // #given a mix of candidates where the dual-trigger review candidate is at position N + // The review candidate's position in the output should be preserved (freshness order). + const sha1 = 'order001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const sha2 = 'order002aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' // dual-trigger SHA + const sha3 = 'order003aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + + const review1 = makeCandidate({mergeSha: sha1}) + const reviewForDual = makeCandidate({mergeSha: sha2, reviewRounds: 3}) + const ciFixForDual = makeCiFixCandidate({mergeSha: sha2}) + const review3 = makeCandidate({mergeSha: sha3}) + + // Input order: review1, reviewForDual, ciFixForDual, review3 + // The dual-trigger review candidate is at position 1 (0-indexed) + const input = makeDigestInput({ + mergedPrs: [review1, reviewForDual, ciFixForDual, review3], + maxLearnings: 10, + }) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then 3 candidates are emitted (review1, dual-trigger, review3) + expect(result.candidates).toHaveLength(3) + + // #then the dual-trigger candidate is at position 1 (its original review position) + expect(result.candidates[1]?.mergeSha).toBe(sha2) + expect(result.candidates[1]?.trigger).toBe('review-heavy') + if (result.candidates[1]?.trigger === 'review-heavy') { + expect(result.candidates[1].ciFix).toBeDefined() + } + + // #then the overall order is preserved: sha1, sha2 (dual), sha3 + expect(result.candidates.map(c => c.mergeSha)).toEqual([sha1, sha2, sha3]) }) }) diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index b49b9794f..72009f6e8 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -137,6 +137,28 @@ export interface CandidateSignals { */ export type Candidate = ReviewCandidate | CiFixCandidate +/** + * Attached ci-fix evidence on a ReviewCandidate that also matched the ci-fail-then-pass + * trigger. Reuses the same field names as CiFixCandidate's evidence fields for consistency. + * Present only when the same PR matched both triggers (within-run dedup attaches it). + * A non-empty `diffExcerpt` means the evidence is substantive; an empty diffExcerpt means + * the evidence was cleared (e.g. by the privacy scan in Unit 2). + */ +export interface CiFixEvidence { + /** Name of the check that transitioned failed → passed. */ + failingCheckName: string + /** + * Diff excerpt between lastFailingSha and firstPassingSha, truncated to + * MAX_EXCERPT_CHARS_PER_CANDIDATE. Empty string when evidence was cleared. + */ + diffExcerpt: string + /** + * Best-effort excerpt from the failing job log. + * '[failure log purged or unavailable]' when the log could not be fetched. + */ + logExcerpt?: string +} + /** * A candidate sourced from a PR with multiple Fro Bot review rounds. * @@ -145,6 +167,10 @@ export type Candidate = ReviewCandidate | CiFixCandidate * bounded to MAX_EXCERPT_CHARS_PER_CANDIDATE. Empty when enrichment was blocked by the * upstream privacy scan or when no prose was available. Array form gives the agent clearer * structure than a single concatenated string. + * + * `ciFix` is present when the same PR also matched the ci-fail-then-pass trigger. The + * within-run dedup attaches the ci-fix evidence to the surviving ReviewCandidate instead + * of dropping it, so one proposal carries both signals (R1, R2). */ export interface ReviewCandidate { trigger: 'review-heavy' @@ -164,6 +190,13 @@ export interface ReviewCandidate { * the enriched content or when no non-empty prose was available. */ reviewExcerpts: string[] + /** + * Attached ci-fix evidence when this PR also matched the ci-fail-then-pass trigger. + * Populated by within-run dedup when a same-SHA dual-trigger pair is collapsed. + * Absent for pure review-heavy PRs. A non-empty diffExcerpt means the evidence is + * substantive; hasCiFixEvidence() checks this boundary precisely. + */ + ciFix?: CiFixEvidence } /** @@ -305,8 +338,8 @@ export function parseMergeShaMarker(body: string): string | null { * Build the opaque candidate digest from injected inputs. * * Steps: - * 0. Within-run dedup: collapse to one candidate per mergeSha (a PR matching both triggers - * appears once per source); review-heavy takes precedence (R4). + * 0. Within-run dedup: collapse to one candidate per mergeSha. A PR matching both triggers + * keeps its review-heavy candidate with the ci-fix evidence attached (one proposal per SHA). * 1. Drop candidates whose mergeSha is already in openedLearningShas (seen-set dedup). * 2. Drop candidates whose signals strongly overlap an existing solutions doc (solutions dedup). * 3. Cap to maxLearnings. @@ -318,6 +351,26 @@ export function parseMergeShaMarker(body: string): string | null { * * No I/O. Fully unit-testable. The private token set is injected so the scan is pure. */ +/** + * Returns true when a candidate carries ci-fix evidence that should count toward the + * `selectWithCiFixFloor` reservation. + * + * A candidate "carries" ci-fix evidence when: + * - It is a pure CiFixCandidate (trigger === 'ci-fail-then-pass'), OR + * - It is a ReviewCandidate with a `ciFix` field whose `diffExcerpt` is non-empty. + * + * An empty-but-present `ciFix` (diffExcerpt === '') does NOT count — that means the + * evidence was cleared (e.g. by the privacy scan) and there is nothing substantive to + * reserve a slot for. + */ +export function hasCiFixEvidence(candidate: Candidate): boolean { + if (candidate.trigger === 'ci-fail-then-pass') return true + if (candidate.trigger === 'review-heavy' && candidate.ciFix !== undefined) { + return candidate.ciFix.diffExcerpt !== '' + } + return false +} + /** * Select up to `cap` candidates from `ordered` (freshness-ordered), guaranteeing that at least * `floor` ci-fail-then-pass candidates are included when that many exist. @@ -338,7 +391,7 @@ export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: n if (cap <= 0) return [] if (floor <= 0) return ordered.slice(0, cap) - const ciFix = ordered.filter(c => c.trigger === 'ci-fail-then-pass') + const ciFix = ordered.filter(c => hasCiFixEvidence(c)) if (ciFix.length === 0) return ordered.slice(0, cap) // Reserve the freshest ci-fix candidates (bounded by floor and cap), then fill the rest of @@ -356,16 +409,54 @@ export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: n export function buildCandidateDigest(input: BuildCandidateDigestInput): CandidateDigest { // Within-run dedup: a PR that matched more than one trigger (e.g. review-heavy AND // ci-fail-then-pass) appears once per source in mergedPrs. Collapse to one candidate per - // mergeSha so a single PR yields a single learning-proposal (R4). Precedence: review-heavy - // wins — review prose is the richer signal when a PR matched both. + // mergeSha so a single PR yields a single learning-proposal (R4). + // + // When a SHA has BOTH a review-heavy and a ci-fail-then-pass record, the ReviewCandidate + // SURVIVES with the CiFixCandidate's evidence ATTACHED to its `ciFix` field (R2). This + // preserves the richest signal (review prose) while keeping the ci-fix evidence so the + // floor can reserve a slot for it. The review candidate keeps its original position in + // the output (freshness order preserved). + // + // Same-trigger duplicates (two review or two ci-fix same SHA) keep existing behavior: + // first-seen wins (the Map only updates when review-heavy takes precedence over ci-fix). + // Unique SHAs are unchanged. const byMergeSha = new Map() + // Track ci-fix candidates separately so we can attach their evidence to review candidates. + const ciFixBySha = new Map() + for (const pr of input.mergedPrs) { + // Record the first ci-fix candidate per SHA for potential attachment to a review candidate. + if (pr.trigger === 'ci-fail-then-pass' && !ciFixBySha.has(pr.mergeSha)) { + ciFixBySha.set(pr.mergeSha, pr) + } + const existing = byMergeSha.get(pr.mergeSha) if (existing === undefined || (existing.trigger !== 'review-heavy' && pr.trigger === 'review-heavy')) { byMergeSha.set(pr.mergeSha, pr) } } - const deduped = [...byMergeSha.values()] + + // Attach ci-fix evidence to any review-heavy candidate that has a matching ci-fix record. + const deduped: Candidate[] = [] + for (const candidate of byMergeSha.values()) { + if (candidate.trigger === 'review-heavy') { + const ciFixRecord = ciFixBySha.get(candidate.mergeSha) + if (ciFixRecord === undefined) { + // Pure review-heavy candidate — no ci-fix counterpart + deduped.push(candidate) + } else { + // Attach the ci-fix evidence to the surviving review candidate (R2) + const ciFix: CiFixEvidence = { + failingCheckName: ciFixRecord.failingCheckName, + diffExcerpt: ciFixRecord.diffExcerpt, + ...(ciFixRecord.logExcerpt === undefined ? {} : {logExcerpt: ciFixRecord.logExcerpt}), + } + deduped.push({...candidate, ciFix}) + } + } else { + deduped.push(candidate) + } + } // drop already-proposed merge SHAs const afterSeenDedup = deduped.filter(pr => !input.openedLearningShas.has(pr.mergeSha)) From 49a356ff0631fea5fd7188b8542e65541c2f2e87 Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 21:34:47 -0700 Subject: [PATCH 3/5] fix(capture): scan attached ci-fix evidence independently and fail-closed A dual-trigger candidate now carries both review prose and the fixing diff, so the privacy step scans each independently on the final candidate: a secret in the diff drops only the diff, a private name in the review prose drops only the prose, and a private name in both is caught in both. A scan-unavailable run clears the attached evidence too, so no unscanned evidence reaches the agent. --- scripts/capture-learnings-harvest.test.ts | 350 ++++++++++++++++++++++ scripts/capture-learnings-harvest.ts | 107 +++++-- 2 files changed, 434 insertions(+), 23 deletions(-) diff --git a/scripts/capture-learnings-harvest.test.ts b/scripts/capture-learnings-harvest.test.ts index 998842487..a251ab83f 100644 --- a/scripts/capture-learnings-harvest.test.ts +++ b/scripts/capture-learnings-harvest.test.ts @@ -3795,3 +3795,353 @@ describe('buildCandidateDigest — CiFix logExcerpt privacy scan', () => { expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) }) }) + +// --------------------------------------------------------------------------- +// Unit 2: Attached ciFix evidence scanned independently in ReviewCandidate +// --------------------------------------------------------------------------- + +// Helper: make a dual candidate (ReviewCandidate with ciFix attached) +function makeDualCandidate( + reviewExcerpts: string[], + ciFix: {failingCheckName: string; diffExcerpt: string; logExcerpt?: string}, + sha = 'dual0001aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', +): ReviewCandidate { + return { + ...makeCandidate({mergeSha: sha, reviewExcerpts}), + ciFix, + } +} + +describe('buildCandidateDigest — Unit 2: attached ciFix privacy scan (independent from reviewExcerpts)', () => { + it('HAPPY: dual candidate with clean review prose + clean diff → both survive, redacted', () => { + // #given a dual candidate with clean review prose and clean ci-fix diff + const candidate = makeDualCandidate(['Please fix the null check here.'], { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: 'Error: test failed at line 42', + }) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts survive + expect(emitted.reviewExcerpts).toEqual(['Please fix the null check here.']) + // #then ciFix survives with its evidence + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('-bad\n+good') + expect(emitted.ciFix?.logExcerpt).toBe('Error: test failed at line 42') + // #then no counters incremented + expect(result.telemetry.enrichmentBlocked).toBe(0) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + }) + + it('INDEPENDENT DROP (diff hit): secret in ciFix.diffExcerpt → ciFix cleared, reviewExcerpts SURVIVE', () => { + // #given a dual candidate with a secret in the attached diff but clean review prose + const secretDiff = '-old\n+token=ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA4' + const candidate = makeDualCandidate( + ['Please fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: secretDiff, + }, + 'dual0002aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts SURVIVE (clean — independent scan) + expect(emitted.reviewExcerpts).toEqual(['Please fix the null check here.']) + // #then ciFix is CLEARED (secret in diff) + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('') + // #then enrichmentBlockedBySecret is incremented (for the ciFix drop) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(1) + // #then the secret does NOT appear in the serialized digest + expect(JSON.stringify(result)).not.toContain('ghp_') + }) + + it('INDEPENDENT DROP (review hit): private name in reviewExcerpts → reviewExcerpts cleared, clean ciFix SURVIVES', () => { + // #given a dual candidate with a private name in review prose but clean ci-fix diff + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const candidate = makeDualCandidate( + ['See testowner/secret-repo#42 for context.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + }, + 'dual0003aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts are CLEARED (private name hit) + expect(emitted.reviewExcerpts).toEqual([]) + // #then ciFix SURVIVES (clean — independent scan) + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('-bad\n+good') + // #then enrichmentBlocked is incremented (for the review drop) + expect(result.telemetry.enrichmentBlocked).toBe(1) + // #then enrichmentBlockedBySecret is NOT incremented + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + // #then private name does NOT appear in serialized digest + expect(JSON.stringify(result)).not.toContain('testowner/secret-repo') + }) + + it('CROSS-FIELD MUTATION PROOF: private name in BOTH reviewExcerpts AND ciFix.diffExcerpt → caught in BOTH', () => { + // #given a dual candidate with the SAME private name in BOTH review prose AND the attached diff + // This is the load-bearing mutation proof: if the attached-ciFix scan is removed, + // the private name in the diff reaches the serialized digest → this test FAILS. + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const candidate = makeDualCandidate( + ['See testowner/secret-repo#42 for context.'], // private name in review prose + { + failingCheckName: 'CI / test', + diffExcerpt: '-old testowner/secret-repo config\n+new config', // private name in diff + }, + 'dual0004aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts are CLEARED (private name in review prose) + expect(emitted.reviewExcerpts).toEqual([]) + // #then ciFix is CLEARED (private name in diff — caught by the INDEPENDENT scan) + expect(emitted.ciFix).toBeDefined() + expect(emitted.ciFix?.diffExcerpt).toBe('') + // #then enrichmentBlocked is incremented TWICE (once for review, once for ciFix) + expect(result.telemetry.enrichmentBlocked).toBe(2) + // #then the private name does NOT appear ANYWHERE in the serialized digest + const serialized = JSON.stringify(result) + expect(serialized).not.toContain('testowner/secret-repo') + expect(serialized).not.toContain('testowner--secret-repo') + + // MUTATION PROOF: if we remove the ciFix scan (pass empty privateTokens for the ciFix scan), + // the diff's private name would reach the digest. We prove this by showing that a candidate + // with the private name ONLY in the diff (not in reviewExcerpts) IS caught by the ciFix scan. + const candidateOnlyDiffHit = makeDualCandidate( + ['Clean review prose — no private names here.'], // clean review + { + failingCheckName: 'CI / test', + diffExcerpt: '-old testowner/secret-repo config\n+new config', // private name ONLY in diff + }, + 'dual0004baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa', + ) + const resultOnlyDiff = buildCandidateDigest(makeDigestInput({mergedPrs: [candidateOnlyDiffHit], privateTokens})) + const emittedOnlyDiff = asReviewCandidate(resultOnlyDiff.candidates[0]) + // #then reviewExcerpts SURVIVE (clean) + expect(emittedOnlyDiff.reviewExcerpts).toEqual(['Clean review prose — no private names here.']) + // #then ciFix is CLEARED (private name in diff — the ciFix scan is load-bearing) + expect(emittedOnlyDiff.ciFix?.diffExcerpt).toBe('') + // #then the private name does NOT appear in the serialized digest + expect(JSON.stringify(resultOnlyDiff)).not.toContain('testowner/secret-repo') + // If the ciFix scan were removed, the diff's private name would appear here → test fails + }) + + it('SCAN-UNAVAILABLE: applyEnrichmentScanAvailability(false) clears reviewExcerpts AND attached ciFix', () => { + // #given a dual candidate with both review prose and ci-fix evidence + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: 'Error: test failed', + }, + 'dual0005aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + + // #when scan is unavailable (token load failed) + const result = applyEnrichmentScanAvailability([candidate], false) + + // #then reviewExcerpts are cleared + const r0 = result[0] + if (r0?.trigger === 'review-heavy') { + expect(r0.reviewExcerpts).toEqual([]) + // #then ciFix evidence is ALSO cleared (no unscanned evidence reaches the digest) + expect(r0.ciFix).toBeDefined() + expect(r0.ciFix?.diffExcerpt).toBe('') + expect(r0.ciFix?.logExcerpt).toBeUndefined() + } + }) + + it('SCAN-UNAVAILABLE: applyEnrichmentScanAvailability(true) leaves dual candidate unchanged', () => { + // #given a dual candidate with both review prose and ci-fix evidence + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: 'Error: test failed', + }, + 'dual0006aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + + // #when scan is available + const result = applyEnrichmentScanAvailability([candidate], true) + + // #then candidates are returned unchanged (same reference) + expect(result).toBe(result) // same array + const r0 = result[0] + if (r0?.trigger === 'review-heavy') { + expect(r0.reviewExcerpts).toEqual(['Fix the null check here.']) + expect(r0.ciFix?.diffExcerpt).toBe('-bad\n+good') + expect(r0.ciFix?.logExcerpt).toBe('Error: test failed') + } + }) + + it('ALLOWLIST/OPACITY: emitted dual candidate carries only allowlisted keys — no owner/repo/number/title', () => { + // #given a dual candidate + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + }, + 'dual0007aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the emitted candidate has only allowlisted keys + expect(result.candidates).toHaveLength(1) + const emitted = result.candidates[0] + expect(emitted).not.toHaveProperty('owner') + expect(emitted).not.toHaveProperty('repo') + expect(emitted).not.toHaveProperty('number') + expect(emitted).not.toHaveProperty('title') + // #then ciFix is present with only allowlisted sub-fields + if (emitted?.trigger === 'review-heavy' && emitted.ciFix !== undefined) { + const ciFixKeys = Object.keys(emitted.ciFix).sort() + // ciFix may have failingCheckName, diffExcerpt, and optionally logExcerpt + const allowedCiFixKeys = ['diffExcerpt', 'failingCheckName'].sort() + const allowedCiFixKeysWithLog = [...allowedCiFixKeys, 'logExcerpt'].sort() + expect([allowedCiFixKeys, allowedCiFixKeysWithLog]).toContainEqual(ciFixKeys) + } + }) + + it('REDACT (not block): /Users/ path in ciFix.diffExcerpt → redacted to [REDACTED], ciFix still emitted', () => { + // #given a dual candidate with a file path in the attached diff (redact-class, not block-class) + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: 'Config loaded from /Users/marcus/.ssh/config for the build.', + }, + 'dual0008aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted (not blocked) + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then ciFix is still present (not cleared) + expect(emitted.ciFix).toBeDefined() + // #then the path is redacted in the diff + expect(emitted.ciFix?.diffExcerpt).not.toContain('/Users/marcus') + expect(emitted.ciFix?.diffExcerpt).toContain('[REDACTED]') + // #then neither counter is incremented (redaction, not blocking) + expect(result.telemetry.enrichmentBlocked).toBe(0) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + }) + + it('REDACT (not block): *.fro.bot hostname in ciFix.diffExcerpt → redacted, ciFix still emitted', () => { + // #given a dual candidate with an internal hostname in the attached diff + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: 'Connecting to api.fro.bot for the check.', + }, + 'dual0009aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted (not blocked) + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then ciFix is still present (not cleared) + expect(emitted.ciFix).toBeDefined() + // #then the hostname is redacted in the diff + expect(emitted.ciFix?.diffExcerpt).not.toContain('api.fro.bot') + expect(emitted.ciFix?.diffExcerpt).toContain('[REDACTED]') + // #then neither counter is incremented + expect(result.telemetry.enrichmentBlocked).toBe(0) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(0) + }) + + it('dual candidate with no ciFix → reviewExcerpts scan unchanged (pure review path unaffected)', () => { + // #given a pure review candidate (no ciFix) with clean prose + const candidate = makeCandidate({ + mergeSha: 'dual0010aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + reviewExcerpts: ['Fix the null check here.'], + }) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted with reviewExcerpts intact + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + expect(emitted.reviewExcerpts).toEqual(['Fix the null check here.']) + expect(emitted.ciFix).toBeUndefined() + expect(result.telemetry.enrichmentBlocked).toBe(0) + }) + + it('ciFix with secret in logExcerpt (not diffExcerpt) → ciFix cleared, reviewExcerpts survive', () => { + // #given a dual candidate with a secret in the attached logExcerpt but clean diff and review prose + const secretLog = 'Error: auth failed with token ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA5' + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-bad\n+good', + logExcerpt: secretLog, + }, + 'dual0011aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens: new Set()}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + // #then reviewExcerpts SURVIVE (clean — independent scan) + expect(emitted.reviewExcerpts).toEqual(['Fix the null check here.']) + // #then ciFix is CLEARED (secret in logExcerpt) + expect(emitted.ciFix?.diffExcerpt).toBe('') + expect(emitted.ciFix?.logExcerpt).toBeUndefined() + // #then enrichmentBlockedBySecret is incremented + expect(result.telemetry.enrichmentBlockedBySecret).toBe(1) + // #then the secret does NOT appear in the serialized digest + expect(JSON.stringify(result)).not.toContain('ghp_') + }) +}) diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index 72009f6e8..c2935029a 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -478,38 +478,90 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // For ReviewCandidate: // 1. Redact structural secrets (paths, hostnames, Bearer tokens) via redactLogDiffSecrets. // 2. If learningBodyHasPrivateLeak (private repo name) OR logDiffHasSecret (hard secret) - // still true after redaction → clear reviewExcerpts + increment enrichmentBlockedBySecret. + // still true after redaction → clear reviewExcerpts + increment the appropriate counter. // 3. Otherwise use the redacted excerpts. + // 4. If ciFix is attached, scan it INDEPENDENTLY (same redact-then-drop-on-residual pattern). + // A hit in ciFix drops only ciFix; a hit in reviewExcerpts drops only reviewExcerpts. + // Both fields are scanned every time — a private name in both is caught in both (R3). // - // For CiFixCandidate (Unit 3 placeholder): - // Evidence fields are not yet populated; no scan needed until Unit 3 adds them. + // For CiFixCandidate: + // Evidence fields are scanned and redacted (diffExcerpt + logExcerpt + failingCheckName). let enrichmentBlocked = 0 let enrichmentBlockedBySecret = 0 const candidates = capped.map(pr => { if (pr.trigger === 'review-heavy') { - if (pr.reviewExcerpts.length === 0) return pr - - // Step 1: redact structural secrets from the already-truncated excerpts - const redactedExcerpts = pr.reviewExcerpts.map(e => redactLogDiffSecrets(e)) - const redactedText = redactedExcerpts.join('\n') - - // Step 2: check for private-name leak or residual hard secret after redaction - const hasPrivateLeak = learningBodyHasPrivateLeak(redactedText, input.privateTokens) - const hasResidualSecret = logDiffHasSecret(redactedText) - - if (hasPrivateLeak) { - // Private-name hit: clear enriched content, keep candidate title-only - enrichmentBlocked++ - return {...pr, reviewExcerpts: []} + // --- reviewExcerpts scan (existing) --- + let resultPr: ReviewCandidate = pr + + if (pr.reviewExcerpts.length > 0) { + // Step 1: redact structural secrets from the already-truncated excerpts + const redactedExcerpts = pr.reviewExcerpts.map(e => redactLogDiffSecrets(e)) + const redactedText = redactedExcerpts.join('\n') + + // Step 2: check for private-name leak or residual hard secret after redaction + const hasPrivateLeak = learningBodyHasPrivateLeak(redactedText, input.privateTokens) + const hasResidualSecret = logDiffHasSecret(redactedText) + + if (hasPrivateLeak) { + // Private-name hit: clear enriched content, keep candidate title-only + enrichmentBlocked++ + resultPr = {...pr, reviewExcerpts: []} + } else if (hasResidualSecret) { + // Hard-secret residual after redaction: clear enriched content + enrichmentBlockedBySecret++ + resultPr = {...pr, reviewExcerpts: []} + } else { + // Step 3: use the redacted excerpts (structural secrets replaced with [REDACTED]) + resultPr = {...pr, reviewExcerpts: redactedExcerpts} + } } - if (hasResidualSecret) { - // Hard-secret residual after redaction: clear enriched content - enrichmentBlockedBySecret++ - return {...pr, reviewExcerpts: []} + + // --- attached ciFix scan (Unit 2, INDEPENDENT of reviewExcerpts scan) --- + // Scanned every time ciFix is present, regardless of the reviewExcerpts result. + // A hit in ciFix drops only ciFix; a hit in reviewExcerpts drops only reviewExcerpts. + if (resultPr.ciFix !== undefined) { + const rawDiff = resultPr.ciFix.diffExcerpt + const rawLog = resultPr.ciFix.logExcerpt ?? '' + const rawCheckName = resultPr.ciFix.failingCheckName + + // Step 1: redact structural secrets (including failingCheckName) + const redactedDiff = redactLogDiffSecrets(rawDiff) + const redactedLog = rawLog === '' ? rawLog : redactLogDiffSecrets(rawLog) + const redactedCheckName = redactLogDiffSecrets(rawCheckName) + const combinedText = `${redactedDiff}\n${redactedLog}\n${redactedCheckName}` + + // Step 2: check for private-name leak or residual hard secret + const ciFixHasPrivateLeak = learningBodyHasPrivateLeak(combinedText, input.privateTokens) + const ciFixHasResidualSecret = logDiffHasSecret(combinedText) + + if (ciFixHasPrivateLeak) { + // Private-name hit in ciFix: clear ciFix evidence, keep reviewExcerpts result + enrichmentBlocked++ + resultPr = { + ...resultPr, + ciFix: {failingCheckName: '[REDACTED]', diffExcerpt: ''}, + } + } else if (ciFixHasResidualSecret) { + // Hard-secret residual in ciFix: clear ciFix evidence + enrichmentBlockedBySecret++ + resultPr = { + ...resultPr, + ciFix: {failingCheckName: '[REDACTED]', diffExcerpt: ''}, + } + } else { + // Step 3: use the redacted values + const redactedCiFix: CiFixEvidence = { + failingCheckName: redactedCheckName, + diffExcerpt: redactedDiff, + } + if (redactedLog !== '') { + redactedCiFix.logExcerpt = redactedLog + } + resultPr = {...resultPr, ciFix: redactedCiFix} + } } - // Step 3: use the redacted excerpts (structural secrets replaced with [REDACTED]) - return {...pr, reviewExcerpts: redactedExcerpts} + return resultPr } // CiFixCandidate: scan and redact diffExcerpt + logExcerpt (R3/R3a). @@ -588,6 +640,15 @@ export function applyEnrichmentScanAvailability(candidates: Candidate[], scanAva if (scanAvailable) return candidates return candidates.map(c => { if (c.trigger === 'review-heavy') { + // Clear reviewExcerpts (existing) and also clear any attached ciFix evidence (Unit 2). + // No unscanned evidence of any type reaches the digest when the scan is unavailable. + if (c.ciFix !== undefined) { + return { + ...c, + reviewExcerpts: [] as string[], + ciFix: {failingCheckName: c.ciFix.failingCheckName, diffExcerpt: '', logExcerpt: undefined}, + } + } return {...c, reviewExcerpts: [] as string[]} } if (c.trigger === 'ci-fail-then-pass') { From 66e60385747037a3536e1e9e9abadae5a0238815 Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 21:41:48 -0700 Subject: [PATCH 4/5] fix(capture): distill dual-trigger learnings from both signals When a candidate carries both review prose and attached ci-fix evidence, the agent now weaves the review feedback and the failure-to-fix into one learning rather than treating them separately. Report a dual-trigger count in the run summary so the merged candidates are visible. --- .github/workflows/capture-learnings.yaml | 10 ++++++++++ ...-001-fix-merged-candidate-evidence-plan.md | 6 +++--- scripts/capture-learnings-harvest.test.ts | 20 +++++++++++++++++++ scripts/capture-learnings-harvest.ts | 12 +++++++++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/.github/workflows/capture-learnings.yaml b/.github/workflows/capture-learnings.yaml index 73930507a..0031be51b 100644 --- a/.github/workflows/capture-learnings.yaml +++ b/.github/workflows/capture-learnings.yaml @@ -111,6 +111,10 @@ jobs: about what was wrong and how it was fixed. It has been privacy-scanned; use it as given. May be empty for some candidates. - signals: { titleTokens: string[], labels: string[] } — secondary context only + - ciFix: object (optional) — present when this PR ALSO failed then fixed CI + before merge. When present it carries { failingCheckName, diffExcerpt, + logExcerpt? } with the same meaning as the ci-fail-then-pass fields below. It + has been privacy-scanned independently; use it as given. trigger: "ci-fail-then-pass" candidates have: - mergeSha: opaque merge commit SHA (the ONLY identifier you receive) @@ -133,6 +137,11 @@ jobs: prose, not in the title tokens. When reviewExcerpts is empty, fall back to signals but keep the body modest and clearly general rather than inventing specifics. + - When `ciFix` is ALSO present (the PR drew review AND failed then fixed CI), + distill from BOTH: what the reviewers flagged AND what the failing check caught + and how the fixing diff corrected it. These dual-signal PRs are the richest + source — weave the review feedback and the failure→fix into one learning, not + two stapled together. For trigger: "ci-fail-then-pass" — - Distills the learning as "this failure → this fix": what the failing check @@ -210,6 +219,7 @@ jobs: echo "| After seen dedup | $(jq '.telemetry.afterSeenDedup // 0' "$harvest") |" echo "| After solutions dedup | $(jq '.telemetry.afterSolutionsDedup // 0' "$harvest") |" echo "| Candidates emitted | $(jq '.telemetry.emitted // 0' "$harvest") |" + echo "| Dual-trigger (merged) candidates | $(jq '.telemetry.mergedCandidates // 0' "$harvest") |" echo "| Enrichment blocked by privacy | $(jq '.telemetry.enrichmentBlocked // 0' "$harvest") |" echo "| Enrichment blocked by secret | $(jq '.telemetry.enrichmentBlockedBySecret // 0' "$harvest") |" else diff --git a/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md b/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md index 6e38e4786..6eef16832 100644 --- a/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md +++ b/docs/plans/2026-06-23-001-fix-merged-candidate-evidence-plan.md @@ -140,7 +140,7 @@ agent prompt: review branch notes the optional attached failure→fix evidence ## Implementation Units -- [ ] **Unit 1: Attach ci-fix evidence in within-run dedup + `hasCiFixEvidence` + floor predicate** +- [x] **Unit 1: Attach ci-fix evidence in within-run dedup + `hasCiFixEvidence` + floor predicate** **Goal:** A same-SHA dual-trigger PR keeps its ci-fix evidence (attached to the review candidate), and the floor reserves slots for any candidate carrying ci-fix evidence. @@ -177,7 +177,7 @@ evidence fields. **Verification:** gates green; the dual-trigger PR retains ci-fix evidence and the floor reserves it. -- [ ] **Unit 2: Scan attached ci-fix evidence in the privacy step** +- [x] **Unit 2: Scan attached ci-fix evidence in the privacy step** **Goal:** A `ReviewCandidate` with attached `ciFix` has BOTH its review prose and its diff/log scanned independently, fail-closed, on the final candidate. @@ -214,7 +214,7 @@ drop-on-residual), the truncate-then-scan ordering invariant. **Verification:** gates green; no unscanned attached evidence reaches the digest; mutation proof bites. -- [ ] **Unit 3: Prompt + telemetry for dual-trigger candidates** +- [x] **Unit 3: Prompt + telemetry for dual-trigger candidates** **Goal:** The agent distills from both evidence sets when present; telemetry counts dual-trigger candidates and per-evidence-type blocks. diff --git a/scripts/capture-learnings-harvest.test.ts b/scripts/capture-learnings-harvest.test.ts index a251ab83f..2ef5db8cf 100644 --- a/scripts/capture-learnings-harvest.test.ts +++ b/scripts/capture-learnings-harvest.test.ts @@ -826,6 +826,24 @@ describe('buildCandidateDigest — dual-trigger attach (Unit 1 regression)', () } }) + it('telemetry: a dual-trigger candidate increments mergedCandidates; pure candidates do not', () => { + // #given one dual-trigger (review+ci-fix same SHA) and one pure-review candidate + const dualSha = 'mergedcc1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const input = makeDigestInput({ + mergedPrs: [ + makeCiFixCandidate({mergeSha: dualSha, failingCheckName: 'CI / test', diffExcerpt: '-bad\n+good'}), + makeCandidate({mergeSha: dualSha, reviewExcerpts: ['Fix it.']}), + makeCandidate({mergeSha: 'purerev1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1', reviewExcerpts: ['Other.']}), + ], + }) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then exactly one candidate is counted as dual-trigger (merged) + expect(result.telemetry.mergedCandidates).toBe(1) + }) + it('dual-trigger: review listed first, ci-fix listed second → same attach result', () => { // #given review-heavy listed BEFORE ci-fix (opposite order from the regression test) const sha = 'dualtrg2aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' @@ -1927,6 +1945,7 @@ describe('harvest→open schema contract', () => { afterSeenDedup: 3, afterSolutionsDedup: 2, emitted: 2, + mergedCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, @@ -3506,6 +3525,7 @@ describe('main() wiring: both candidate sources concatenated via buildCandidateD afterSeenDedup: 0, afterSolutionsDedup: 0, emitted: 0, + mergedCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index c2935029a..daaa3f1ee 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -257,6 +257,11 @@ export interface DigestTelemetry { afterSeenDedup: number afterSolutionsDedup: number emitted: number + /** + * Number of dual-trigger candidates: a review-heavy candidate that also carried attached + * ci-fix evidence (the PR drew review AND failed then fixed CI). Counts-only. + */ + mergedCandidates: number /** * Number of candidates whose enriched content was dropped by the private-name scan. * The candidate itself is kept (title-only); only the enriched evidence is cleared. @@ -468,6 +473,11 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // review-heavy candidates can't starve the ci-fail-then-pass trigger. const capped = selectWithCiFixFloor(afterSolutionsDedup, input.maxLearnings, CIFIX_FLOOR) + // Count dual-trigger (merged) candidates: review-heavy candidates that carry attached ci-fix + // evidence (the PR drew review AND failed then fixed CI). Counted on the emitted set before the + // privacy scan, so a candidate whose evidence is later dropped still counts as dual-trigger. + const mergedCandidates = capped.filter(c => c.trigger === 'review-heavy' && c.ciFix !== undefined).length + // upstream privacy scan: scan each candidate's evidence text fail-closed. // // Privacy-ordering invariant: scan the already-truncated evidence text (truncation @@ -619,6 +629,7 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat afterSeenDedup: afterSeenDedup.length, afterSolutionsDedup: afterSolutionsDedup.length, emitted: candidates.length, + mergedCandidates, enrichmentBlocked, enrichmentBlockedBySecret, }, @@ -1851,6 +1862,7 @@ async function main(): Promise { afterSeenDedup: 0, afterSolutionsDedup: 0, emitted: 0, + mergedCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, From 4d2b25054574a55bdff5cfa4e81e5380bb061a4d Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 22:19:32 -0700 Subject: [PATCH 5/5] fix(capture): share the ci-fix evidence scan and rename the dual-trigger counter Extract the redact-then-scan logic shared by the standalone and attached ci-fix evidence paths into one helper so the two scans cannot drift, rename the telemetry counter to dualTriggerCandidates so it does not read as 'merged into main', and correct the blocked-counter docs now that a dual-trigger candidate can drop each evidence field independently. --- .github/workflows/capture-learnings.yaml | 2 +- .gitignore | 1 + scripts/capture-learnings-harvest.test.ts | 114 ++++++++++++++++--- scripts/capture-learnings-harvest.ts | 129 ++++++++++++++-------- 4 files changed, 181 insertions(+), 65 deletions(-) diff --git a/.github/workflows/capture-learnings.yaml b/.github/workflows/capture-learnings.yaml index 0031be51b..09f4e4330 100644 --- a/.github/workflows/capture-learnings.yaml +++ b/.github/workflows/capture-learnings.yaml @@ -219,7 +219,7 @@ jobs: echo "| After seen dedup | $(jq '.telemetry.afterSeenDedup // 0' "$harvest") |" echo "| After solutions dedup | $(jq '.telemetry.afterSolutionsDedup // 0' "$harvest") |" echo "| Candidates emitted | $(jq '.telemetry.emitted // 0' "$harvest") |" - echo "| Dual-trigger (merged) candidates | $(jq '.telemetry.mergedCandidates // 0' "$harvest") |" + echo "| Dual-trigger candidates | $(jq '.telemetry.dualTriggerCandidates // 0' "$harvest") |" echo "| Enrichment blocked by privacy | $(jq '.telemetry.enrichmentBlocked // 0' "$harvest") |" echo "| Enrichment blocked by secret | $(jq '.telemetry.enrichmentBlockedBySecret // 0' "$harvest") |" else diff --git a/.gitignore b/.gitignore index 39e78a5f8..03c70864f 100644 --- a/.gitignore +++ b/.gitignore @@ -25,6 +25,7 @@ coverage # Local agent artifacts .context/ +.review/ # Wiki lint local artifacts (CI uploads them to actions/upload-artifact; never tracked in git) wiki-lint-report.md diff --git a/scripts/capture-learnings-harvest.test.ts b/scripts/capture-learnings-harvest.test.ts index 2ef5db8cf..1ae455912 100644 --- a/scripts/capture-learnings-harvest.test.ts +++ b/scripts/capture-learnings-harvest.test.ts @@ -622,7 +622,7 @@ describe('buildCandidateDigest — CiFixCandidate union support', () => { expect(result.telemetry.afterSeenDedup).toBe(0) }) - it('within-run dedup (R4): same SHA from both triggers yields one candidate, review-heavy wins WITH ci-fix attached', () => { + it('within-run dedup: same SHA from both triggers yields one candidate, review-heavy wins WITH ci-fix attached', () => { // #given a review-heavy and a ci-fix candidate with the SAME mergeSha, NOT yet proposed // (a PR that matched both triggers in this run) const sha = 'bothtrg1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' @@ -634,10 +634,10 @@ describe('buildCandidateDigest — CiFixCandidate union support', () => { // #when building the digest const result = buildCandidateDigest(input) - // #then exactly one candidate is emitted (R4), and it is the review-heavy one + // #then exactly one candidate is emitted, and it is the review-heavy one expect(result.candidates).toHaveLength(1) expect(result.candidates[0]?.trigger).toBe('review-heavy') - // #then the ci-fix evidence is ATTACHED to the surviving review candidate (R2) + // #then the ci-fix evidence is ATTACHED to the surviving review candidate // (not dropped — the dual-trigger PR is the richest learning source) if (result.candidates[0]?.trigger === 'review-heavy') { expect(result.candidates[0].ciFix).toBeDefined() @@ -723,7 +723,7 @@ describe('buildCandidateDigest — CHARACTERIZATION: single-trigger behavior unc }) // --------------------------------------------------------------------------- -// Unit 1: hasCiFixEvidence helper +// hasCiFixEvidence helper // --------------------------------------------------------------------------- describe('hasCiFixEvidence', () => { @@ -784,10 +784,10 @@ describe('hasCiFixEvidence', () => { }) // --------------------------------------------------------------------------- -// Unit 1: dual-trigger attach — the live-bug regression +// dual-trigger attach — the live-bug regression // --------------------------------------------------------------------------- -describe('buildCandidateDigest — dual-trigger attach (Unit 1 regression)', () => { +describe('buildCandidateDigest — dual-trigger attach regression', () => { it('REGRESSION: same-SHA review+ci-fix pair → ONE ReviewCandidate with ciFix attached', () => { // #given a review-heavy and a ci-fix candidate with the SAME mergeSha // This is the live bug: before this fix, the ci-fix evidence was dropped. @@ -826,7 +826,7 @@ describe('buildCandidateDigest — dual-trigger attach (Unit 1 regression)', () } }) - it('telemetry: a dual-trigger candidate increments mergedCandidates; pure candidates do not', () => { + it('telemetry: a dual-trigger candidate increments dualTriggerCandidates; pure candidates do not', () => { // #given one dual-trigger (review+ci-fix same SHA) and one pure-review candidate const dualSha = 'mergedcc1aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' const input = makeDigestInput({ @@ -840,8 +840,8 @@ describe('buildCandidateDigest — dual-trigger attach (Unit 1 regression)', () // #when building the digest const result = buildCandidateDigest(input) - // #then exactly one candidate is counted as dual-trigger (merged) - expect(result.telemetry.mergedCandidates).toBe(1) + // #then exactly one candidate is counted as dual-trigger + expect(result.telemetry.dualTriggerCandidates).toBe(1) }) it('dual-trigger: review listed first, ci-fix listed second → same attach result', () => { @@ -906,7 +906,7 @@ describe('buildCandidateDigest — dual-trigger attach (Unit 1 regression)', () }) // --------------------------------------------------------------------------- -// Unit 1: floor-fix — dual-trigger candidate is reserved by selectWithCiFixFloor +// floor-fix — dual-trigger candidate is reserved by selectWithCiFixFloor // --------------------------------------------------------------------------- describe('selectWithCiFixFloor — floor-fix: dual-trigger candidate is reserved', () => { @@ -1002,7 +1002,7 @@ describe('selectWithCiFixFloor — floor-fix: dual-trigger candidate is reserved }) // --------------------------------------------------------------------------- -// Unit 1: edge cases — same-trigger duplicates and freshness order +// dual-trigger edge cases — same-trigger duplicates and freshness order // --------------------------------------------------------------------------- describe('buildCandidateDigest — dual-trigger edge cases', () => { @@ -1945,7 +1945,7 @@ describe('harvest→open schema contract', () => { afterSeenDedup: 3, afterSolutionsDedup: 2, emitted: 2, - mergedCandidates: 0, + dualTriggerCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, @@ -3525,7 +3525,7 @@ describe('main() wiring: both candidate sources concatenated via buildCandidateD afterSeenDedup: 0, afterSolutionsDedup: 0, emitted: 0, - mergedCandidates: 0, + dualTriggerCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, }, @@ -3817,7 +3817,7 @@ describe('buildCandidateDigest — CiFix logExcerpt privacy scan', () => { }) // --------------------------------------------------------------------------- -// Unit 2: Attached ciFix evidence scanned independently in ReviewCandidate +// Attached ciFix evidence scanned independently in ReviewCandidate // --------------------------------------------------------------------------- // Helper: make a dual candidate (ReviewCandidate with ciFix attached) @@ -3832,7 +3832,7 @@ function makeDualCandidate( } } -describe('buildCandidateDigest — Unit 2: attached ciFix privacy scan (independent from reviewExcerpts)', () => { +describe('buildCandidateDigest — attached ciFix privacy scan (independent from reviewExcerpts)', () => { it('HAPPY: dual candidate with clean review prose + clean diff → both survive, redacted', () => { // #given a dual candidate with clean review prose and clean ci-fix diff const candidate = makeDualCandidate(['Please fix the null check here.'], { @@ -4165,3 +4165,87 @@ describe('buildCandidateDigest — Unit 2: attached ciFix privacy scan (independ expect(JSON.stringify(result)).not.toContain('ghp_') }) }) + +// --------------------------------------------------------------------------- +// counts-before-scan invariant — dualTriggerCandidates counted pre-scan +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — dualTriggerCandidates counted before privacy scan', () => { + it('dual-trigger candidate whose ciFix is cleared by privacy scan still increments dualTriggerCandidates', () => { + // #given a dual-trigger candidate whose attached ciFix contains a private name + // (so the privacy scan will CLEAR the ciFix evidence) + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const sha = 'dual0012aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const candidate = makeDualCandidate( + ['Fix the null check here.'], + { + failingCheckName: 'CI / test', + diffExcerpt: '-old testowner/secret-repo config\n+new config', // private name → will be cleared + }, + sha, + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + + // #then ciFix is CLEARED (private name in diff) + expect(emitted.ciFix?.diffExcerpt).toBe('') + + // #then dualTriggerCandidates is STILL 1 — counted on the pre-scan capped set, + // not on the post-scan output. The candidate had ciFix attached before the scan. + expect(result.telemetry.dualTriggerCandidates).toBe(1) + }) +}) + +// --------------------------------------------------------------------------- +// mixed threat types — private NAME in reviewExcerpts, ghp_ SECRET in ciFix +// --------------------------------------------------------------------------- + +describe('buildCandidateDigest — mixed threat types in dual-trigger candidate', () => { + it('private NAME in reviewExcerpts AND ghp_ SECRET in ciFix.diffExcerpt → both counters increment independently', () => { + // #given a dual-trigger candidate where: + // - reviewExcerpts contains a private repo name (enrichmentBlocked counter) + // - ciFix.diffExcerpt contains a GitHub PAT (enrichmentBlockedBySecret counter) + // This proves both counter classes increment correctly when each field has a different threat. + const privateTokens = buildPrivateTokenSet(['testowner/secret-repo']) + const sha = 'dual0013aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa1' + const candidate = makeDualCandidate( + ['See testowner/secret-repo#42 for context.'], // private NAME → enrichmentBlocked + { + failingCheckName: 'CI / test', + diffExcerpt: '-old\n+token=ghp_AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA6', // ghp_ SECRET → enrichmentBlockedBySecret + }, + sha, + ) + const input = makeDigestInput({mergedPrs: [candidate], privateTokens}) + + // #when building the digest + const result = buildCandidateDigest(input) + + // #then the candidate is emitted + expect(result.candidates).toHaveLength(1) + const emitted = asReviewCandidate(result.candidates[0]) + + // #then reviewExcerpts are CLEARED (private name hit → enrichmentBlocked) + expect(emitted.reviewExcerpts).toEqual([]) + + // #then ciFix is CLEARED (ghp_ secret hit → enrichmentBlockedBySecret) + expect(emitted.ciFix?.diffExcerpt).toBe('') + + // #then enrichmentBlocked is 1 (private name in reviewExcerpts) + expect(result.telemetry.enrichmentBlocked).toBe(1) + + // #then enrichmentBlockedBySecret is 1 (ghp_ secret in ciFix.diffExcerpt) + expect(result.telemetry.enrichmentBlockedBySecret).toBe(1) + + // #then neither secret appears in the serialized digest + const serialized = JSON.stringify(result) + expect(serialized).not.toContain('testowner/secret-repo') + expect(serialized).not.toContain('ghp_') + }) +}) diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index daaa3f1ee..d49d99151 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -133,7 +133,7 @@ export interface CandidateSignals { * * Design: top-level discriminated fields per variant (not a nested `evidence` block). * This keeps the diff minimal, preserves the existing ReviewCandidate shape exactly - * (only adding `trigger`), and avoids over-engineering before Unit 3 adds CiFix evidence. + * (only adding `trigger`), and avoids over-engineering. */ export type Candidate = ReviewCandidate | CiFixCandidate @@ -142,7 +142,7 @@ export type Candidate = ReviewCandidate | CiFixCandidate * trigger. Reuses the same field names as CiFixCandidate's evidence fields for consistency. * Present only when the same PR matched both triggers (within-run dedup attaches it). * A non-empty `diffExcerpt` means the evidence is substantive; an empty diffExcerpt means - * the evidence was cleared (e.g. by the privacy scan in Unit 2). + * the evidence was cleared (e.g. by the privacy scan). */ export interface CiFixEvidence { /** Name of the check that transitioned failed → passed. */ @@ -170,7 +170,7 @@ export interface CiFixEvidence { * * `ciFix` is present when the same PR also matched the ci-fail-then-pass trigger. The * within-run dedup attaches the ci-fix evidence to the surviving ReviewCandidate instead - * of dropping it, so one proposal carries both signals (R1, R2). + * of dropping it, so one proposal carries both signals. */ export interface ReviewCandidate { trigger: 'review-heavy' @@ -261,17 +261,21 @@ export interface DigestTelemetry { * Number of dual-trigger candidates: a review-heavy candidate that also carried attached * ci-fix evidence (the PR drew review AND failed then fixed CI). Counts-only. */ - mergedCandidates: number + dualTriggerCandidates: number /** - * Number of candidates whose enriched content was dropped by the private-name scan. + * Number of evidence fields dropped by the private-name scan. * The candidate itself is kept (title-only); only the enriched evidence is cleared. + * A single dual-trigger candidate can contribute up to 2 (one per evidence type: + * reviewExcerpts and attached ciFix are scanned independently). * Counts-only — no private names logged. */ enrichmentBlocked: number /** - * Number of candidates whose enriched content was dropped because it contained a - * hard-secret shape (PAT, private key, credential-bearing connection string, etc.) - * detected by `logDiffHasSecret` after `redactLogDiffSecrets` was applied. + * Number of evidence fields dropped because they contained a hard-secret shape + * (PAT, private key, credential-bearing connection string, etc.) detected by + * `logDiffHasSecret` after `redactLogDiffSecrets` was applied. + * A single dual-trigger candidate can contribute up to 2 (one per evidence type: + * reviewExcerpts and attached ciFix are scanned independently). * Counts-only — no secret values logged. */ enrichmentBlockedBySecret: number @@ -411,13 +415,53 @@ export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: n return ordered.filter(c => chosen.has(c)).slice(0, cap) } +/** + * Shared pure helper: redact and scan ci-fix evidence fields. + * + * Redacts structural secrets from diffExcerpt, logExcerpt, and failingCheckName via + * redactLogDiffSecrets, then checks the combined text for private-name leaks and + * residual hard secrets. Returns the redacted values and boolean scan results so + * callers can apply their own clear-or-emit logic. + * + * Privacy-ordering invariant: inputs must already be truncated to budget before this + * call. Never move truncation after the scan. + * + * Pure function: no I/O, no side effects, no counter mutations. + */ +function scanCiFixEvidence( + diffExcerpt: string, + logExcerpt: string | undefined, + failingCheckName: string, + privateTokens: Set, +): { + redactedDiff: string + redactedLog: string + redactedCheckName: string + hasPrivateLeak: boolean + hasResidualSecret: boolean +} { + const rawLog = logExcerpt ?? '' + + // Step 1: redact structural secrets (paths, hostnames, Bearer tokens) + const redactedDiff = redactLogDiffSecrets(diffExcerpt) + const redactedLog = rawLog === '' ? rawLog : redactLogDiffSecrets(rawLog) + const redactedCheckName = redactLogDiffSecrets(failingCheckName) + const combinedText = `${redactedDiff}\n${redactedLog}\n${redactedCheckName}` + + // Step 2: check for private-name leak or residual hard secret after redaction + const hasPrivateLeak = learningBodyHasPrivateLeak(combinedText, privateTokens) + const hasResidualSecret = logDiffHasSecret(combinedText) + + return {redactedDiff, redactedLog, redactedCheckName, hasPrivateLeak, hasResidualSecret} +} + export function buildCandidateDigest(input: BuildCandidateDigestInput): CandidateDigest { // Within-run dedup: a PR that matched more than one trigger (e.g. review-heavy AND // ci-fail-then-pass) appears once per source in mergedPrs. Collapse to one candidate per - // mergeSha so a single PR yields a single learning-proposal (R4). + // mergeSha so a single PR yields a single learning-proposal. // // When a SHA has BOTH a review-heavy and a ci-fail-then-pass record, the ReviewCandidate - // SURVIVES with the CiFixCandidate's evidence ATTACHED to its `ciFix` field (R2). This + // SURVIVES with the CiFixCandidate's evidence ATTACHED to its `ciFix` field. This // preserves the richest signal (review prose) while keeping the ci-fix evidence so the // floor can reserve a slot for it. The review candidate keeps its original position in // the output (freshness order preserved). @@ -450,7 +494,7 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // Pure review-heavy candidate — no ci-fix counterpart deduped.push(candidate) } else { - // Attach the ci-fix evidence to the surviving review candidate (R2) + // Attach the ci-fix evidence to the surviving review candidate const ciFix: CiFixEvidence = { failingCheckName: ciFixRecord.failingCheckName, diffExcerpt: ciFixRecord.diffExcerpt, @@ -473,10 +517,10 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // review-heavy candidates can't starve the ci-fail-then-pass trigger. const capped = selectWithCiFixFloor(afterSolutionsDedup, input.maxLearnings, CIFIX_FLOOR) - // Count dual-trigger (merged) candidates: review-heavy candidates that carry attached ci-fix - // evidence (the PR drew review AND failed then fixed CI). Counted on the emitted set before the - // privacy scan, so a candidate whose evidence is later dropped still counts as dual-trigger. - const mergedCandidates = capped.filter(c => c.trigger === 'review-heavy' && c.ciFix !== undefined).length + // Count dual-trigger candidates: review-heavy candidates that carry attached ci-fix + // evidence (the PR drew review AND failed then fixed CI). Counted on the pre-scan capped set, + // so a candidate whose evidence is later dropped still counts as dual-trigger. + const dualTriggerCandidates = capped.filter(c => c.trigger === 'review-heavy' && c.ciFix !== undefined).length // upstream privacy scan: scan each candidate's evidence text fail-closed. // @@ -492,7 +536,7 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // 3. Otherwise use the redacted excerpts. // 4. If ciFix is attached, scan it INDEPENDENTLY (same redact-then-drop-on-residual pattern). // A hit in ciFix drops only ciFix; a hit in reviewExcerpts drops only reviewExcerpts. - // Both fields are scanned every time — a private name in both is caught in both (R3). + // Both fields are scanned every time — a private name in both is caught in both. // // For CiFixCandidate: // Evidence fields are scanned and redacted (diffExcerpt + logExcerpt + failingCheckName). @@ -526,32 +570,25 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat } } - // --- attached ciFix scan (Unit 2, INDEPENDENT of reviewExcerpts scan) --- + // --- attached ciFix scan (INDEPENDENT of reviewExcerpts scan) --- // Scanned every time ciFix is present, regardless of the reviewExcerpts result. // A hit in ciFix drops only ciFix; a hit in reviewExcerpts drops only reviewExcerpts. if (resultPr.ciFix !== undefined) { - const rawDiff = resultPr.ciFix.diffExcerpt - const rawLog = resultPr.ciFix.logExcerpt ?? '' - const rawCheckName = resultPr.ciFix.failingCheckName + const {redactedDiff, redactedLog, redactedCheckName, hasPrivateLeak, hasResidualSecret} = scanCiFixEvidence( + resultPr.ciFix.diffExcerpt, + resultPr.ciFix.logExcerpt, + resultPr.ciFix.failingCheckName, + input.privateTokens, + ) - // Step 1: redact structural secrets (including failingCheckName) - const redactedDiff = redactLogDiffSecrets(rawDiff) - const redactedLog = rawLog === '' ? rawLog : redactLogDiffSecrets(rawLog) - const redactedCheckName = redactLogDiffSecrets(rawCheckName) - const combinedText = `${redactedDiff}\n${redactedLog}\n${redactedCheckName}` - - // Step 2: check for private-name leak or residual hard secret - const ciFixHasPrivateLeak = learningBodyHasPrivateLeak(combinedText, input.privateTokens) - const ciFixHasResidualSecret = logDiffHasSecret(combinedText) - - if (ciFixHasPrivateLeak) { + if (hasPrivateLeak) { // Private-name hit in ciFix: clear ciFix evidence, keep reviewExcerpts result enrichmentBlocked++ resultPr = { ...resultPr, ciFix: {failingCheckName: '[REDACTED]', diffExcerpt: ''}, } - } else if (ciFixHasResidualSecret) { + } else if (hasResidualSecret) { // Hard-secret residual in ciFix: clear ciFix evidence enrichmentBlockedBySecret++ resultPr = { @@ -574,7 +611,7 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat return resultPr } - // CiFixCandidate: scan and redact diffExcerpt + logExcerpt (R3/R3a). + // CiFixCandidate: scan and redact diffExcerpt + logExcerpt. // // Privacy-ordering invariant: diffExcerpt and logExcerpt are already truncated // to budget in the I/O shell before this pure core is called. Never move @@ -587,18 +624,12 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // increment the appropriate counter. // 3. Otherwise use the redacted values. if (pr.trigger === 'ci-fail-then-pass') { - const rawDiff = pr.diffExcerpt - const rawLog = pr.logExcerpt ?? '' - - // Step 1: redact structural secrets (including failingCheckName) - const redactedDiff = redactLogDiffSecrets(rawDiff) - const redactedLog = rawLog === '' ? rawLog : redactLogDiffSecrets(rawLog) - const redactedCheckName = redactLogDiffSecrets(pr.failingCheckName) - const combinedText = `${redactedDiff}\n${redactedLog}\n${redactedCheckName}` - - // Step 2: check for private-name leak or residual hard secret - const hasPrivateLeak = learningBodyHasPrivateLeak(combinedText, input.privateTokens) - const hasResidualSecret = logDiffHasSecret(combinedText) + const {redactedDiff, redactedLog, redactedCheckName, hasPrivateLeak, hasResidualSecret} = scanCiFixEvidence( + pr.diffExcerpt, + pr.logExcerpt, + pr.failingCheckName, + input.privateTokens, + ) if (hasPrivateLeak) { enrichmentBlocked++ @@ -629,7 +660,7 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat afterSeenDedup: afterSeenDedup.length, afterSolutionsDedup: afterSolutionsDedup.length, emitted: candidates.length, - mergedCandidates, + dualTriggerCandidates, enrichmentBlocked, enrichmentBlockedBySecret, }, @@ -651,7 +682,7 @@ export function applyEnrichmentScanAvailability(candidates: Candidate[], scanAva if (scanAvailable) return candidates return candidates.map(c => { if (c.trigger === 'review-heavy') { - // Clear reviewExcerpts (existing) and also clear any attached ciFix evidence (Unit 2). + // Clear reviewExcerpts (existing) and also clear any attached ciFix evidence. // No unscanned evidence of any type reaches the digest when the scan is unavailable. if (c.ciFix !== undefined) { return { @@ -1831,7 +1862,7 @@ async function main(): Promise { // If the scan is unavailable, clear all enriched evidence before passing to the pure core. // This ensures no unscanned prose reaches the digest under any path. // applyEnrichmentScanAvailability handles both ReviewCandidate (clears reviewExcerpts) - // and CiFixCandidate (clears diffExcerpt + logExcerpt) — Unit 3 verified this. + // and CiFixCandidate (clears diffExcerpt + logExcerpt). const safeCandidates = applyEnrichmentScanAvailability(allCandidates, enrichmentScanAvailable) const digest = buildCandidateDigest({ @@ -1862,7 +1893,7 @@ async function main(): Promise { afterSeenDedup: 0, afterSolutionsDedup: 0, emitted: 0, - mergedCandidates: 0, + dualTriggerCandidates: 0, enrichmentBlocked: 0, enrichmentBlockedBySecret: 0, },