From e1715f3efc3d737202dd1d3f2abfecbff9f66b1a Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 18:19:07 -0700 Subject: [PATCH 1/2] fix(capture): reserve a cap slot for failed-then-fixed candidates A backlog of review-heavy candidates sorted ahead of failed-then-fixed ones and filled the per-run cap, so the new trigger could detect candidates yet never author a proposal. Reserve at least one slot for failed-then-fixed candidates when any exist, filling the rest of the budget in freshness order, so the trigger delivers when it fires while the per-run total stays bounded. --- scripts/capture-learnings-harvest.test.ts | 71 +++++++++++++++++++++++ scripts/capture-learnings-harvest.ts | 46 ++++++++++++++- 2 files changed, 115 insertions(+), 2 deletions(-) diff --git a/scripts/capture-learnings-harvest.test.ts b/scripts/capture-learnings-harvest.test.ts index 94a2da5d1..109224346 100644 --- a/scripts/capture-learnings-harvest.test.ts +++ b/scripts/capture-learnings-harvest.test.ts @@ -24,6 +24,7 @@ import { LEARNING_PROPOSAL_LABEL, MAX_EXCERPT_CHARS_PER_CANDIDATE, parseMergeShaMarker, + selectWithCiFixFloor, type BuildCandidateDigestInput, type Candidate, type CandidateDigest, @@ -2143,6 +2144,76 @@ function makeSc(sha: string, context: string, state: string): CommitCheckEntry { return {type: 'StatusContext', sha, context, state} } +describe('selectWithCiFixFloor', () => { + const sha = (n: number): string => `${n}`.padStart(40, '0') + + it('reserves a ci-fix slot when review-heavy candidates would otherwise fill the cap', () => { + // #given the production starvation scenario: many review-heavy candidates sorted ahead + // of a single ci-fix candidate, with a cap smaller than the review-heavy count + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const ciFix = makeCiFixCandidate({mergeSha: sha(99)}) + const ordered = [...reviewHeavy, ciFix] // ci-fix sorts LAST (loses a flat slice) + + // #when selecting with cap=5, floor=1 + const selected = selectWithCiFixFloor(ordered, 5, 1) + + // #then the ci-fix candidate is included despite sorting past the cap + expect(selected).toHaveLength(5) + expect(selected).toContain(ciFix) + // and the rest are the freshest review-heavy ones (4 of them) + expect(selected.filter(c => c.trigger === 'review-heavy')).toHaveLength(4) + }) + + it('preserves freshness order among the selected candidates', () => { + const ordered = [ + makeCandidate({mergeSha: sha(1)}), + makeCandidate({mergeSha: sha(2)}), + makeCiFixCandidate({mergeSha: sha(3)}), + makeCandidate({mergeSha: sha(4)}), + ] + const selected = selectWithCiFixFloor(ordered, 3, 1) + // original order preserved (not reordered to put the reserved ci-fix first) + expect(selected.map(c => c.mergeSha)).toEqual([sha(1), sha(2), sha(3)]) + }) + + it('is a plain head cap when no ci-fix candidate exists', () => { + const ordered = Array.from({length: 6}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const selected = selectWithCiFixFloor(ordered, 5, 1) + expect(selected).toHaveLength(5) + expect(selected.map(c => c.mergeSha)).toEqual([sha(0), sha(1), sha(2), sha(3), sha(4)]) + }) + + it('is a plain head cap when floor is 0', () => { + const ciFix = makeCiFixCandidate({mergeSha: sha(99)}) + const ordered = [...Array.from({length: 5}, (_, i) => makeCandidate({mergeSha: sha(i)})), ciFix] + const selected = selectWithCiFixFloor(ordered, 5, 0) + expect(selected).not.toContain(ciFix) // floor 0 → no reservation, ci-fix sorts out + }) + + it('reserves up to floor ci-fix candidates (the freshest) when several exist', () => { + const ciFixA = makeCiFixCandidate({mergeSha: sha(50)}) + const ciFixB = makeCiFixCandidate({mergeSha: sha(51)}) + const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)})) + const ordered = [...reviewHeavy, ciFixA, ciFixB] + // floor=2 → both ci-fix reserved + const selected = selectWithCiFixFloor(ordered, 5, 2) + expect(selected).toContain(ciFixA) + expect(selected).toContain(ciFixB) + expect(selected).toHaveLength(5) + }) + + it('floor never exceeds the cap', () => { + const ciFix = Array.from({length: 4}, (_, i) => makeCiFixCandidate({mergeSha: sha(50 + i)})) + const selected = selectWithCiFixFloor(ciFix, 2, 3) // floor 3 > cap 2 + expect(selected).toHaveLength(2) // capped at 2, not 3 + }) + + it('returns empty when cap is 0', () => { + const ordered = [makeCiFixCandidate({mergeSha: sha(1)})] + expect(selectWithCiFixFloor(ordered, 0, 1)).toEqual([]) + }) +}) + describe('buildLogExcerpt', () => { it('keeps the stack-trace context around an error line, not just the error line', () => { // #given a log where an error line is surrounded by stack-trace context diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index e342973e1..56a0b8c12 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -48,6 +48,16 @@ const MIN_CORRECTION_SIGNALS = 1 /** Maximum number of candidates to emit per run. */ const MAX_LEARNINGS_PER_RUN = 5 +/** + * Minimum cap slots reserved for ci-fail-then-pass candidates when any exist. + * + * Without a floor, a backlog of review-heavy candidates sorts ahead of ci-fix ones and + * saturates the cap, so a working ci-fix trigger can detect candidates yet never author a + * proposal. Reserving at least this many slots guarantees the trigger delivers when it fires, + * while keeping the per-run total bounded by MAX_LEARNINGS_PER_RUN. + */ +const CIFIX_FLOOR = 1 + /** * Maximum total characters of review-prose excerpts per candidate. * Ranked by correction signal so the correction sentence is never clipped first. @@ -308,6 +318,37 @@ export function parseMergeShaMarker(body: string): string | null { * * No I/O. Fully unit-testable. The private token set is injected so the scan is pure. */ +/** + * Select up to `cap` candidates from `ordered` (freshness-ordered), guaranteeing that at least + * `floor` ci-fail-then-pass candidates are included when that many exist. + * + * Without this, a long run of review-heavy candidates at the front of `ordered` fills the cap + * before any ci-fix candidate is reached. The floor reserves slots for the freshest ci-fix + * candidates, then fills the remaining slots from `ordered` (skipping the ci-fix candidates + * already taken), preserving the original relative order of everything that is emitted. + * + * Pure and order-stable: when no ci-fix candidate exists, or the floor is 0, this is a plain + * head-of-list cap. + */ +export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: number): Candidate[] { + if (cap <= 0) return [] + if (floor <= 0) return ordered.slice(0, cap) + + const ciFix = ordered.filter(c => c.trigger === 'ci-fail-then-pass') + if (ciFix.length === 0) return ordered.slice(0, cap) + + // Reserve the freshest ci-fix candidates (bounded by floor and cap), then fill the rest of + // the budget from the freshness-ordered list, skipping anything already reserved. + const chosen = new Set(ciFix.slice(0, Math.min(floor, cap))) + for (const candidate of ordered) { + if (chosen.size >= cap) break + chosen.add(candidate) + } + + // Emit in original (freshness) order, capped. + return ordered.filter(c => chosen.has(c)).slice(0, cap) +} + export function buildCandidateDigest(input: BuildCandidateDigestInput): CandidateDigest { // Within-run dedup: a PR that matched more than one trigger (e.g. review-heavy AND // ci-fail-then-pass) appears once per source in mergedPrs. Collapse to one candidate per @@ -328,8 +369,9 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat // drop candidates whose signals overlap an existing solutions doc const afterSolutionsDedup = afterSeenDedup.filter(pr => !overlapsAnySolutionsDoc(pr.signals, input.solutionsDocs)) - // cap to maxLearnings - const capped = afterSolutionsDedup.slice(0, input.maxLearnings) + // cap to maxLearnings, reserving a floor of slots for ci-fix candidates so a backlog of + // review-heavy candidates can't starve the ci-fail-then-pass trigger. + const capped = selectWithCiFixFloor(afterSolutionsDedup, input.maxLearnings, CIFIX_FLOOR) // upstream privacy scan: scan each candidate's evidence text fail-closed. // From bc4a5f50b1b407d4a3784ce8be4c245ed40108f5 Mon Sep 17 00:00:00 2001 From: "Marcus R. Brown" Date: Tue, 23 Jun 2026 18:24:07 -0700 Subject: [PATCH 2/2] docs(capture): note the reference-uniqueness contract on the cap-floor selector --- scripts/capture-learnings-harvest.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/capture-learnings-harvest.ts b/scripts/capture-learnings-harvest.ts index 56a0b8c12..b49b9794f 100644 --- a/scripts/capture-learnings-harvest.ts +++ b/scripts/capture-learnings-harvest.ts @@ -329,6 +329,10 @@ export function parseMergeShaMarker(body: string): string | null { * * Pure and order-stable: when no ci-fix candidate exists, or the floor is 0, this is a plain * head-of-list cap. + * + * Expects `ordered` to contain reference-unique candidate objects (it does in production: the + * list flows from the `byMergeSha` dedup, so every element is a distinct reference). Selection + * keys on object identity, so feeding a list with duplicated references would skew the count. */ export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: number): Candidate[] { if (cap <= 0) return []