Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions scripts/capture-learnings-harvest.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import {
LEARNING_PROPOSAL_LABEL,
MAX_EXCERPT_CHARS_PER_CANDIDATE,
parseMergeShaMarker,
selectWithCiFixFloor,
type BuildCandidateDigestInput,
type Candidate,
type CandidateDigest,
Expand Down Expand Up @@ -2143,6 +2144,76 @@ function makeSc(sha: string, context: string, state: string): CommitCheckEntry {
return {type: 'StatusContext', sha, context, state}
}

describe('selectWithCiFixFloor', () => {
const sha = (n: number): string => `${n}`.padStart(40, '0')

it('reserves a ci-fix slot when review-heavy candidates would otherwise fill the cap', () => {
// #given the production starvation scenario: many review-heavy candidates sorted ahead
// of a single ci-fix candidate, with a cap smaller than the review-heavy count
const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)}))
const ciFix = makeCiFixCandidate({mergeSha: sha(99)})
const ordered = [...reviewHeavy, ciFix] // ci-fix sorts LAST (loses a flat slice)

// #when selecting with cap=5, floor=1
const selected = selectWithCiFixFloor(ordered, 5, 1)

// #then the ci-fix candidate is included despite sorting past the cap
expect(selected).toHaveLength(5)
expect(selected).toContain(ciFix)
// and the rest are the freshest review-heavy ones (4 of them)
expect(selected.filter(c => c.trigger === 'review-heavy')).toHaveLength(4)
})

it('preserves freshness order among the selected candidates', () => {
const ordered = [
makeCandidate({mergeSha: sha(1)}),
makeCandidate({mergeSha: sha(2)}),
makeCiFixCandidate({mergeSha: sha(3)}),
makeCandidate({mergeSha: sha(4)}),
]
const selected = selectWithCiFixFloor(ordered, 3, 1)
// original order preserved (not reordered to put the reserved ci-fix first)
expect(selected.map(c => c.mergeSha)).toEqual([sha(1), sha(2), sha(3)])
})

it('is a plain head cap when no ci-fix candidate exists', () => {
const ordered = Array.from({length: 6}, (_, i) => makeCandidate({mergeSha: sha(i)}))
const selected = selectWithCiFixFloor(ordered, 5, 1)
expect(selected).toHaveLength(5)
expect(selected.map(c => c.mergeSha)).toEqual([sha(0), sha(1), sha(2), sha(3), sha(4)])
})

it('is a plain head cap when floor is 0', () => {
const ciFix = makeCiFixCandidate({mergeSha: sha(99)})
const ordered = [...Array.from({length: 5}, (_, i) => makeCandidate({mergeSha: sha(i)})), ciFix]
const selected = selectWithCiFixFloor(ordered, 5, 0)
expect(selected).not.toContain(ciFix) // floor 0 → no reservation, ci-fix sorts out
})

it('reserves up to floor ci-fix candidates (the freshest) when several exist', () => {
const ciFixA = makeCiFixCandidate({mergeSha: sha(50)})
const ciFixB = makeCiFixCandidate({mergeSha: sha(51)})
const reviewHeavy = Array.from({length: 8}, (_, i) => makeCandidate({mergeSha: sha(i)}))
const ordered = [...reviewHeavy, ciFixA, ciFixB]
// floor=2 → both ci-fix reserved
const selected = selectWithCiFixFloor(ordered, 5, 2)
expect(selected).toContain(ciFixA)
expect(selected).toContain(ciFixB)
expect(selected).toHaveLength(5)
})

it('floor never exceeds the cap', () => {
const ciFix = Array.from({length: 4}, (_, i) => makeCiFixCandidate({mergeSha: sha(50 + i)}))
const selected = selectWithCiFixFloor(ciFix, 2, 3) // floor 3 > cap 2
expect(selected).toHaveLength(2) // capped at 2, not 3
})

it('returns empty when cap is 0', () => {
const ordered = [makeCiFixCandidate({mergeSha: sha(1)})]
expect(selectWithCiFixFloor(ordered, 0, 1)).toEqual([])
})
})

describe('buildLogExcerpt', () => {
it('keeps the stack-trace context around an error line, not just the error line', () => {
// #given a log where an error line is surrounded by stack-trace context
Expand Down
50 changes: 48 additions & 2 deletions scripts/capture-learnings-harvest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,16 @@ const MIN_CORRECTION_SIGNALS = 1
/** Maximum number of candidates to emit per run. */
const MAX_LEARNINGS_PER_RUN = 5

/**
* Minimum cap slots reserved for ci-fail-then-pass candidates when any exist.
*
* Without a floor, a backlog of review-heavy candidates sorts ahead of ci-fix ones and
* saturates the cap, so a working ci-fix trigger can detect candidates yet never author a
* proposal. Reserving at least this many slots guarantees the trigger delivers when it fires,
* while keeping the per-run total bounded by MAX_LEARNINGS_PER_RUN.
*/
const CIFIX_FLOOR = 1

/**
* Maximum total characters of review-prose excerpts per candidate.
* Ranked by correction signal so the correction sentence is never clipped first.
Expand Down Expand Up @@ -308,6 +318,41 @@ export function parseMergeShaMarker(body: string): string | null {
*
* No I/O. Fully unit-testable. The private token set is injected so the scan is pure.
*/
/**
* Select up to `cap` candidates from `ordered` (freshness-ordered), guaranteeing that at least
* `floor` ci-fail-then-pass candidates are included when that many exist.
*
* Without this, a long run of review-heavy candidates at the front of `ordered` fills the cap
* before any ci-fix candidate is reached. The floor reserves slots for the freshest ci-fix
* candidates, then fills the remaining slots from `ordered` (skipping the ci-fix candidates
* already taken), preserving the original relative order of everything that is emitted.
*
* Pure and order-stable: when no ci-fix candidate exists, or the floor is 0, this is a plain
* head-of-list cap.
*
* Expects `ordered` to contain reference-unique candidate objects (it does in production: the
* list flows from the `byMergeSha` dedup, so every element is a distinct reference). Selection
* keys on object identity, so feeding a list with duplicated references would skew the count.
*/
export function selectWithCiFixFloor(ordered: Candidate[], cap: number, floor: number): Candidate[] {
if (cap <= 0) return []
if (floor <= 0) return ordered.slice(0, cap)

const ciFix = ordered.filter(c => c.trigger === 'ci-fail-then-pass')
if (ciFix.length === 0) return ordered.slice(0, cap)

// Reserve the freshest ci-fix candidates (bounded by floor and cap), then fill the rest of
// the budget from the freshness-ordered list, skipping anything already reserved.
const chosen = new Set<Candidate>(ciFix.slice(0, Math.min(floor, cap)))
for (const candidate of ordered) {
if (chosen.size >= cap) break
chosen.add(candidate)
}

// Emit in original (freshness) order, capped.
return ordered.filter(c => chosen.has(c)).slice(0, cap)
}

export function buildCandidateDigest(input: BuildCandidateDigestInput): CandidateDigest {
// Within-run dedup: a PR that matched more than one trigger (e.g. review-heavy AND
// ci-fail-then-pass) appears once per source in mergedPrs. Collapse to one candidate per
Expand All @@ -328,8 +373,9 @@ export function buildCandidateDigest(input: BuildCandidateDigestInput): Candidat
// drop candidates whose signals overlap an existing solutions doc
const afterSolutionsDedup = afterSeenDedup.filter(pr => !overlapsAnySolutionsDoc(pr.signals, input.solutionsDocs))

// cap to maxLearnings
const capped = afterSolutionsDedup.slice(0, input.maxLearnings)
// cap to maxLearnings, reserving a floor of slots for ci-fix candidates so a backlog of
// review-heavy candidates can't starve the ci-fail-then-pass trigger.
const capped = selectWithCiFixFloor(afterSolutionsDedup, input.maxLearnings, CIFIX_FLOOR)

// upstream privacy scan: scan each candidate's evidence text fail-closed.
//
Expand Down
Loading