diff --git a/docs/brutal-remediation-backlog.md b/docs/brutal-remediation-backlog.md index 88391ac..45aaf62 100644 --- a/docs/brutal-remediation-backlog.md +++ b/docs/brutal-remediation-backlog.md @@ -624,3 +624,576 @@ * opportunistic mass deletion * cleanup justified by impression rather than evidence + +--- + +## Verdict + +Your backlog gets Cherry to: + +> truthful advisory + canonical lifecycle + live-state UI + +It does **not yet** get Cherry to: + +> closed-loop financial controller with execution authority, durable trust, and market-grade reliability + +You need backlog items for **data ingestion, consent, execution, reversibility, auditability, liability, and controlled rollout**. + +--- + +# Add These Issues + +## 25. P0 — Define user consent, permissions, and action authority boundary + +* `Priority`: `P0` +* `Owner Domain`: `security` +* `Owner Role`: `security-owner` +* `Owner Scope`: user authorization, consent records, execution permissions, revocation semantics +* `Why now`: Cherry cannot execute financial actions unless it has an explicit, auditable authority model. +* `Dependencies`: issues `1` through `22` +* `PR Order`: `25` +* `Acceptance Criteria`: + + * every action class has an explicit permission requirement + * user consent is persisted with timestamp, scope, actor, and revocation state + * read-only, advisory, confirmation-required, and autonomous modes are distinct + * no execution-capable path can run under advisory-only consent + * docs define exactly what Cherry is allowed to do +* `Out of Scope`: + + * payment rail integration + * bank/provider onboarding + +--- + +## 26. P0 — Add immutable audit ledger for recommendations, decisions, confirmations, and mutations + +* `Priority`: `P0` +* `Owner Domain`: `ledger` +* `Owner Role`: `ledger-owner` +* `Owner Scope`: audit events, decision records, state transition trace, actor attribution +* `Why now`: A finance controller without an immutable audit trail is unserious. +* `Dependencies`: issues `15`, `16`, `20`, `25` +* `PR Order`: `26` +* `Acceptance Criteria`: + + * every recommendation has a persisted decision record + * every user confirmation is linked to the decision it approved + * every ledger mutation references the originating decision/session + * every execution attempt records request, response, status, error, and idempotency key + * audit records are append-only + * no mutation exists without provenance +* `Out of Scope`: + + * analytics dashboards + * external compliance export + +--- + +## 27. P0 — Establish idempotent financial action execution semantics + +* `Priority`: `P0` +* `Owner Domain`: `execution` +* `Owner Role`: `execution-owner` +* `Owner Scope`: idempotency keys, execution attempts, retries, duplicate prevention, terminal execution states +* `Why now`: Retrying money movement without hard idempotency is how toy systems become financial shrapnel. +* `Dependencies`: issues `25` and `26` +* `PR Order`: `27` +* `Acceptance Criteria`: + + * every executable financial action has a stable idempotency key + * retry behavior is explicitly defined + * duplicate execution is prevented by storage-level constraints + * execution states include at least: + + * `PENDING` + * `SUBMITTED` + * `SUCCEEDED` + * `FAILED` + * `CANCELED` + * `REVERSED` + * `UNKNOWN` + * unknown execution state is never treated as success + * docs define recovery behavior for partial failure +* `Out of Scope`: + + * real payment providers + * autonomous execution policy + +--- + +## 28. P1 — Build provider-agnostic financial account connection model + +* `Priority`: `P1` +* `Owner Domain`: `integrations` +* `Owner Role`: `integrations-owner` +* `Owner Scope`: connected accounts, provider tokens, account identity, account capabilities +* `Why now`: Cherry needs real state, but provider details must not leak into engine truth. +* `Dependencies`: issues `25` through `27` +* `PR Order`: `28` +* `Acceptance Criteria`: + + * connected accounts are represented behind a provider-agnostic model + * provider account IDs are mapped to stable internal account IDs + * account capabilities are explicit: + + * balance readable + * transactions readable + * payments executable + * transfers executable + * credit liability readable + * unavailable capabilities degrade explicitly + * engine state uses internal canonical identities, not provider-native IDs +* `Out of Scope`: + + * supporting many providers + * payment execution + +--- + +## 29. P1 — Add transaction ingestion, normalization, and reconciliation pipeline + +* `Priority`: `P1` +* `Owner Domain`: `ingest` +* `Owner Role`: `ingest-owner` +* `Owner Scope`: transactions, balances, pending/posted reconciliation, account snapshots +* `Why now`: Closed-loop finance requires durable observed state, not one-off request snapshots. +* `Dependencies`: issue `28` +* `PR Order`: `29` +* `Acceptance Criteria`: + + * transactions ingest into canonical normalized records + * pending and posted transactions are distinct + * duplicate provider transactions are deduplicated + * balance snapshots are timestamped + * stale balances are marked degraded + * reconciliation detects mismatch between projected and observed state + * reconciliation mismatch blocks autonomous execution until resolved +* `Out of Scope`: + + * merchant intelligence + * category ML + * forecasting + +--- + +## 30. P1 — Implement stable liability linkage for cards, debts, and repayment targets + +* `Priority`: `P1` +* `Owner Domain`: `simulation` +* `Owner Role`: `engine-owner` +* `Owner Scope`: credit cards, linked liabilities, repayment targets, account identity graph +* `Why now`: Earlier degradation tells the truth about missing liability truth. This issue actually fixes the missing truth. +* `Dependencies`: issues `28` and `29` +* `PR Order`: `30` +* `Acceptance Criteria`: + + * credit cards link to canonical liabilities through stable identity + * repayment targets are explicit + * liability balances are loaded from live state where available + * unlinked credit accounts remain degraded + * label/name matching is not used as a truth source + * runtime tests prove valid card actions survive hard-constraint filtering when linkage exists +* `Out of Scope`: + + * interest optimization + * balance transfer products + * synthetic linkage + +--- + +## 31. P1 — Add reversible execution and compensation semantics + +* `Priority`: `P1` +* `Owner Domain`: `execution` +* `Owner Role`: `execution-owner` +* `Owner Scope`: cancellation, reversal, compensating actions, failed execution recovery +* `Why now`: Finance systems need a plan for being wrong. +* `Dependencies`: issues `27` through `30` +* `PR Order`: `31` +* `Acceptance Criteria`: + + * each executable action declares whether it is reversible + * reversible actions define cancellation window + * irreversible actions require stricter confirmation policy + * failed partial execution creates a recovery task + * compensation actions are represented separately from original actions + * UI/API never implies reversibility where none exists +* `Out of Scope`: + + * legal dispute handling + * customer support tooling + +--- + +## 32. P1 — Add execution-safe policy modes + +* `Priority`: `P1` +* `Owner Domain`: `execution` +* `Owner Role`: `execution-owner` +* `Owner Scope`: advisory mode, confirmation mode, supervised automation, autonomous automation +* `Why now`: Cherry needs staged authority. Jumping from recommendations to autonomy is how systems die. +* `Dependencies`: issues `25` through `31` +* `PR Order`: `32` +* `Acceptance Criteria`: + + * modes are explicit: + + * `ADVISORY_ONLY` + * `CONFIRM_EACH_ACTION` + * `SUPERVISED_AUTOPILOT` + * `AUTONOMOUS_LIMITED` + * each mode has allowed action classes + * each mode has max dollar limits + * each mode has degradation behavior + * autonomous modes are disabled unless all required primitives are available + * mode escalation requires explicit user consent +* `Out of Scope`: + + * growth onboarding + * marketing surfaces + +--- + +## 33. P1 — Define safety envelopes and hard financial guardrails + +* `Priority`: `P1` +* `Owner Domain`: `simulation` +* `Owner Role`: `engine-owner` +* `Owner Scope`: minimum cash floor, overdraft prevention, debt-payment limits, execution blockers +* `Why now`: Optimization without hard guardrails is just elegant negligence. +* `Dependencies`: issues `30` through `32` +* `PR Order`: `33` +* `Acceptance Criteria`: + + * minimum liquidity floor is explicit + * overdraft-risk actions are blocked + * debt paydowns cannot consume protected cash + * execution is blocked on stale balances + * execution is blocked on unresolved reconciliation mismatch + * guardrails are enforced below the UI/API layer + * guardrail breaches are auditable +* `Out of Scope`: + + * personalized financial advice regulation strategy + * investment risk modeling + +--- + +## 34. P2 — Add billing-cycle, interest, due-date, and minimum-payment semantics + +* `Priority`: `P2` +* `Owner Domain`: `simulation` +* `Owner Role`: `engine-owner` +* `Owner Scope`: credit cycles, APR, due dates, statement balances, minimum payments +* `Why now`: Debt optimization is fake without credit-cycle mechanics. +* `Dependencies`: issues `30` and `33` +* `PR Order`: `34` +* `Acceptance Criteria`: + + * credit accounts include APR where available + * statement balance and current balance are distinct + * due dates are represented explicitly + * minimum payment obligations are modeled + * late-payment risk is a hard constraint or explicit penalty + * payoff recommendations distinguish interest savings from liquidity pressure +* `Out of Scope`: + + * balance transfers + * credit-score prediction + * loan refinancing + +--- + +## 35. P2 — Add recurring obligations and cashflow calendar + +* `Priority`: `P2` +* `Owner Domain`: `planning` +* `Owner Role`: `planning-owner` +* `Owner Scope`: income, rent, subscriptions, recurring bills, expected obligations +* `Why now`: Present-time decisions are incomplete if near-future mandatory cashflows are invisible. +* `Dependencies`: issues `29`, `33`, and optionally `34` +* `PR Order`: `35` +* `Acceptance Criteria`: + + * recurring income is represented explicitly + * recurring obligations are represented explicitly + * obligation confidence is tracked + * near-future obligations affect liquidity guardrails + * uncertain obligations are not treated as guaranteed truth + * docs distinguish observed recurring patterns from user-confirmed obligations +* `Out of Scope`: + + * full forecasting engine + * tax planning + * investment planning + +--- + +## 36. P2 — Add policy evaluation harness with counterfactual replay + +* `Priority`: `P2` +* `Owner Domain`: `simulation` +* `Owner Role`: `engine-owner` +* `Owner Scope`: replay, counterfactual comparison, historical decision evaluation +* `Why now`: You need evidence that Cherry improves outcomes, not just tests that it behaves as written. +* `Dependencies`: issues `26`, `29`, `33`, and `35` +* `PR Order`: `36` +* `Acceptance Criteria`: + + * historical account state can be replayed safely + * Cherry decisions can be compared against baseline policies + * baseline policies are documented + * metrics include: + + * avoided overdraft risk + * interest reduction + * preserved liquidity + * debt reduction + * failed/degraded decision rate + * replay never mutates live state +* `Out of Scope`: + + * public benchmark claims + * marketing statistics + +--- + +## 37. P2 — Add decision quality metrics and production observability + +* `Priority`: `P2` +* `Owner Domain`: `observability` +* `Owner Role`: `infra-owner` +* `Owner Scope`: metrics, traces, decision outcomes, degradation rates, execution outcomes +* `Why now`: Without observability, Cherry cannot distinguish correctness from luck. +* `Dependencies`: issues `26`, `27`, `32`, and `36` +* `PR Order`: `37` +* `Acceptance Criteria`: + + * degradation rate is measured + * recommendation acceptance rate is measured + * execution success/failure rate is measured + * stale-state blocks are measured + * reconciliation mismatch rate is measured + * decision outcome metrics are tied to audit ledger records + * sensitive financial values are redacted from logs +* `Out of Scope`: + + * growth analytics + * ad tracking + * behavioral manipulation + +--- + +## 38. P1 — Add privacy, retention, and data deletion policy enforcement + +* `Priority`: `P1` +* `Owner Domain`: `security` +* `Owner Role`: `security-owner` +* `Owner Scope`: financial data retention, deletion, export, redaction, logs +* `Why now`: Real financial data changes the threat model. The repo-cleanup issues are not enough. +* `Dependencies`: issues `28`, `29`, and `37` +* `PR Order`: `38` +* `Acceptance Criteria`: + + * financial data classes have retention rules + * user deletion deletes or anonymizes covered records + * logs cannot contain raw account numbers, tokens, or transaction payloads + * exported audit data redacts secrets + * deletion behavior is tested + * docs define what is retained and why +* `Out of Scope`: + + * formal compliance certification + * enterprise governance features + +--- + +## 39. P1 — Threat-model connected-account and execution surfaces + +* `Priority`: `P1` +* `Owner Domain`: `security` +* `Owner Role`: `security-owner` +* `Owner Scope`: provider tokens, execution APIs, account linking, webhook trust, replay protection +* `Why now`: Once Cherry connects to financial providers, normal app security is insufficient. +* `Dependencies`: issues `25`, `28`, `29`, `32`, and `38` +* `PR Order`: `39` +* `Acceptance Criteria`: + + * threat model document exists + * token storage boundary is defined + * webhook authenticity is verified + * replay attacks are blocked + * privilege escalation between users/accounts is tested + * execution endpoints require strongest authorization boundary + * high-risk actions are rate-limited +* `Out of Scope`: + + * external pentest + * SOC2 theater + +--- + +## 40. P2 — Build provider sandbox integration before live execution + +* `Priority`: `P2` +* `Owner Domain`: `integrations` +* `Owner Role`: `integrations-owner` +* `Owner Scope`: sandbox provider connection, sandbox balances, sandbox transactions, sandbox execution +* `Why now`: Real execution must be rehearsed somewhere that cannot hurt anyone. +* `Dependencies`: issues `27` through `39` +* `PR Order`: `40` +* `Acceptance Criteria`: + + * sandbox account connection works + * sandbox transaction ingestion works + * sandbox balance refresh works + * sandbox execution attempt works + * sandbox webhooks update execution state + * reconciliation works against sandbox-observed state + * no live provider credentials are required for tests +* `Out of Scope`: + + * live provider rollout + * production autonomy + +--- + +## 41. P1 — Add live execution behind confirmation-only gate + +* `Priority`: `P1` +* `Owner Domain`: `execution` +* `Owner Role`: `execution-owner` +* `Owner Scope`: live confirmed payments/transfers, confirmation UX/API, execution audit +* `Why now`: The first real execution milestone should require human confirmation. +* `Dependencies`: issue `40` +* `PR Order`: `41` +* `Acceptance Criteria`: + + * live execution is impossible without explicit confirmation + * confirmed action payload is shown before execution + * action payload cannot mutate after confirmation + * execution result is persisted + * unknown result is surfaced as unknown, not failed or succeeded + * audit ledger links recommendation → confirmation → execution → observed reconciliation +* `Out of Scope`: + + * autonomous execution + * multi-provider support + +--- + +## 42. P1 — Add autonomous limited execution with strict caps + +* `Priority`: `P1` +* `Owner Domain`: `execution` +* `Owner Role`: `execution-owner` +* `Owner Scope`: limited autopilot, action caps, kill switch, escalation policy +* `Why now`: Autonomy should only exist after confirmation-only execution has proven safe. +* `Dependencies`: issues `41`, `37`, and `39` +* `PR Order`: `42` +* `Acceptance Criteria`: + + * autonomous execution is disabled by default + * user must opt in explicitly + * dollar caps are enforced + * action-class caps are enforced + * stale/degraded/reconciled-unknown state blocks autonomy + * global kill switch exists + * user-level kill switch exists + * autonomous decisions are auditable +* `Out of Scope`: + + * broad autonomy + * investments + * loans/refinancing + +--- + +## 43. P2 — Define narrow product vertical and success metric + +* `Priority`: `P2` +* `Owner Domain`: `product` +* `Owner Role`: `product-owner` +* `Owner Scope`: initial market slice, primary user, primary decision class, success metric +* `Why now`: “Personal finance AI” is too broad. Broad systems die beautifully. +* `Dependencies`: issues `20` through `24` +* `PR Order`: `43` +* `Acceptance Criteria`: + + * one initial vertical is chosen + * one primary user problem is chosen + * one measurable success metric is chosen + * unsupported product claims are removed + * roadmap explicitly excludes unrelated finance domains +* `Recommended vertical`: + + * credit-card payoff and liquidity-safe purchase routing +* `Out of Scope`: + + * wealth management + * investing + * tax + * full budgeting suite + +--- + +## 44. P2 — Infer user preferences from transaction history + +* `Priority`: `P2` +* `Owner Domain`: `intelligence` +* `Owner Role`: `intelligence-owner` +* `Owner Scope`: transaction classification, merchant memory, preference inference, discretionary pattern detection +* `Why now`: Cherry needs to distinguish financially unsafe spending from spending that is safe but misaligned with the user’s actual habits and priorities. +* `Dependencies`: issues `29`, `35`, `38`, and `39` +* `PR Order`: `44` +* `Acceptance Criteria`: + + * recurring merchants are detected from transaction history + * discretionary categories are inferred from observed spending + * user preference weights are derived from behavior, not manually assumed + * inferred preferences are confidence-scored + * low-confidence preferences cannot drive hard rejection + * users can correct inferred preferences + * corrected preferences override model inference + * preference inference never overrides hard liquidity, debt, or safety constraints + * explanations distinguish: + + * unsafe + * safe but expensive + * safe but preference-misaligned + * safe and preference-aligned +* `Out of Scope`: + + * manipulative spending nudges + * advertising + * selling transaction data + * moral judgment about purchases + +--- + +# Current Backlog Gap Map + +| Required Capability | Covered Now? | Add | +| ---------------------------- | --------------------: | ---------- | +| truthful solver | mostly | 9–14 | +| canonical advisory lifecycle | yes | 15–22 | +| live-state UI | yes | 21–22 | +| account connection | no | 28 | +| transaction ingestion | no | 29 | +| stable liability truth | partial/degraded only | 30 | +| user consent | no | 25 | +| execution authority | no | 27, 41, 42 | +| auditability | weak | 26 | +| reversibility/recovery | no | 31 | +| safety guardrails | partial | 33 | +| credit-cycle realism | no | 34 | +| recurring cashflow | no | 35 | +| empirical quality proof | no | 36–37 | +| privacy/retention | weak | 38 | +| threat model | weak | 39 | +| sandbox rollout | no | 40 | +| product focus | no | 43 | + +--- + diff --git a/docs/config-snapshot.md b/docs/config-snapshot.md index 0241263..d5f2035 100644 --- a/docs/config-snapshot.md +++ b/docs/config-snapshot.md @@ -849,7 +849,7 @@ export default nextConfig; "typescript": "^5" }, "engines": { - "node": ">=24.15.0 <25" + "node": ">=24.14.1 <25" } }, "node_modules/@alloc/quick-lru": { @@ -9583,7 +9583,7 @@ export default nextConfig; "private": true, "type": "module", "engines": { - "node": ">=24.15.0 <25" + "node": ">=24.14.1 <25" }, "engineStrict": true, "packageManager": "npm@11.12.1", diff --git a/docs/engine-optimality/candidate-space.md b/docs/engine-optimality/candidate-space.md index 1f37076..476a4ea 100644 --- a/docs/engine-optimality/candidate-space.md +++ b/docs/engine-optimality/candidate-space.md @@ -1,10 +1,12 @@ Status: Active -Last updated: 2026-03-19 +Last updated: 2026-04-28 # Engine Optimality Candidate Space ## Current behavior +Cherry evaluates a bounded generated candidate set with deterministic heuristic ranking. It does not prove global optimality over all possible financial actions. + ### Candidate space R(B) (bounded) `R(B)` is the representable candidate set defined purely by the bounds axes for @@ -41,6 +43,7 @@ Each action type has explicit parameter axes bounded by `Bounds`: - The live solver is not a future scheduler. - Live-generated `PAY_DOWN_DEBT` and `USE_CARD_WITH_PAYDOWN` actions are immediate-only and single-step. - `USE_CARD_WITH_PAYDOWN` is ordered as: purchase authorization effect, then immediate paydown effect. +- `maxCandidates`, when provided to the live solver, caps returned and traced ranked candidates only after deterministic filtering and ranking; it does not prune generated candidates before evaluation. ### Completeness Lemma (Bounded) diff --git a/docs/engine-optimality/status.md b/docs/engine-optimality/status.md index 315bd94..8c0e3b1 100644 --- a/docs/engine-optimality/status.md +++ b/docs/engine-optimality/status.md @@ -1,10 +1,12 @@ Status: Active -Last updated: 2026-01-18 +Last updated: 2026-04-28 # Engine Optimality Status ## Current behavior +Cherry evaluates a bounded generated candidate set with deterministic heuristic ranking. It does not prove global optimality over all possible financial actions. + ### Proven (bounded) - Bounded exact optimality is proven for `(objective_v1, candidates_v1)` under @@ -18,6 +20,11 @@ Last updated: 2026-01-18 - Real-world preference correctness or reward accuracy. - Completeness outside the tested bounds **B**. +### Live solver surface cap + +- `maxCandidates`, when provided, caps the surfaced ranked candidates in returned decisions and trace output. +- `maxCandidates` does not cap the evaluated candidate set before scoring. + ### Trace schema - `docs/engine-optimality/trace.md` diff --git a/lib/engine/solver.ts b/lib/engine/solver.ts index 49a8cab..518a399 100644 --- a/lib/engine/solver.ts +++ b/lib/engine/solver.ts @@ -129,9 +129,8 @@ export async function solveDecision( options.candidateFilter != null ? candidateActions.filter((action) => options.candidateFilter?.(action) === true) : candidateActions; - // PR8.3 intentionally counts exclusions from the surface-filtered generated set before - // hard filtering and before score sorting. This is a temporary coupling to pre-PR9 - // truncation behavior; PR9 may revise evaluation-order semantics. + // Count exclusions from the full surface-filtered generated set before hard filtering + // and ranking. maxCandidates caps surfaced ranked output only, not evaluation. const exclusions = surfaceFilteredCandidates.reduce((acc, action) => { if (!actionRequiresResolvableCreditLiability(action)) { return acc; @@ -151,15 +150,10 @@ export async function solveDecision( !Number.isNaN(options.maxCandidates) ? options.maxCandidates : null; - const constrainedCandidates = - maxCandidates !== null && surfaceFilteredCandidates.length > maxCandidates - ? surfaceFilteredCandidates.slice(0, maxCandidates) - : surfaceFilteredCandidates; - const decisions: EngineDecision[] = []; const hardConstraints = getHardConstraints(state); - for (const action of constrainedCandidates) { + for (const action of surfaceFilteredCandidates) { const projections = simulateAction(state, ctx, action, { scheduledPaydownEvaluation }); const { score, reasons, components } = scoreDecision(state, ctx, action, projections, weights); const constraintTags = evaluateConstraintsForDecision(state, ctx, action, projections); @@ -184,6 +178,10 @@ export async function solveDecision( if (primary !== 0) return primary; return a.actionId.localeCompare(b.actionId); }); + const surfaced = + maxCandidates !== null && filtered.length > maxCandidates + ? filtered.slice(0, maxCandidates) + : filtered; const trace: EngineDecisionTrace = { engineVersion: ENGINE_VERSION, @@ -198,7 +196,7 @@ export async function solveDecision( merchantCategoryKey: ctx.merchantCategoryKey == null ? null : ctx.merchantCategoryKey, amountCents: ctx.amountCents == null ? null : ctx.amountCents, }, - candidates: filtered.map((d) => ({ + candidates: surfaced.map((d) => ({ action: d.action, score: d.score, constraintsBreached: d.constraintsBreached, @@ -229,7 +227,7 @@ export async function solveDecision( } const result: SolveDecisionResult = { - decisions: filtered, + decisions: surfaced, trace, exclusions, capabilities, diff --git a/lib/engine/version.ts b/lib/engine/version.ts index 5a8742f..b4dba5f 100644 --- a/lib/engine/version.ts +++ b/lib/engine/version.ts @@ -1,4 +1,4 @@ -export const engineBehaviorVersion = 'engine_behavior_v4' as const; +export const engineBehaviorVersion = 'engine_behavior_v5' as const; export const engineInputVersion = 'engine_input_v1' as const; export const engineCandidateSpaceVersion = 'engine_candidate_space_v1' as const; export const engineAccountingVersion = 'engine_accounting_v1' as const; diff --git a/package-lock.json b/package-lock.json index 4fe77fc..e9974dc 100644 --- a/package-lock.json +++ b/package-lock.json @@ -52,7 +52,7 @@ "typescript": "^5" }, "engines": { - "node": ">=24.15.0 <25" + "node": ">=24.14.1 <25" } }, "node_modules/@alloc/quick-lru": { diff --git a/package.json b/package.json index 52701b7..534825a 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,7 @@ "private": true, "type": "module", "engines": { - "node": ">=24.15.0 <25" + "node": ">=24.14.1 <25" }, "engineStrict": true, "packageManager": "npm@11.12.1", diff --git a/scripts/check-vercel-parity.mts b/scripts/check-vercel-parity.mts index 1076d8a..d255bd3 100644 --- a/scripts/check-vercel-parity.mts +++ b/scripts/check-vercel-parity.mts @@ -34,9 +34,10 @@ if (!isVercel) { if (!Number.isFinite(major) || !Number.isFinite(minor) || !Number.isFinite(patch)) { guardrailFail('Unable to parse Node version', [process.version]); } - const satisfiesNodeEngine = major === 24 && (minor > 15 || (minor === 15 && patch >= 0)); + const satisfiesNodeEngine = + major === 24 && (minor > 14 || (minor === 14 && patch >= 1)); if (!satisfiesNodeEngine) { - guardrailFail('Node version must satisfy engines.node >=24.15.0 <25', [process.version]); + guardrailFail('Node version must satisfy engines.node >=24.14.1 <25', [process.version]); } const tmpRoot = process.env['CHERRY_TMP_ROOT']; diff --git a/scripts/guardrails/engine-freeze.policy.json b/scripts/guardrails/engine-freeze.policy.json index 2078182..f09867f 100644 --- a/scripts/guardrails/engine-freeze.policy.json +++ b/scripts/guardrails/engine-freeze.policy.json @@ -6,7 +6,7 @@ ] }, "engineVersions": { - "behavior": "engine_behavior_v4", + "behavior": "engine_behavior_v5", "input": "engine_input_v1", "candidateSpace": "engine_candidate_space_v1", "accounting": "engine_accounting_v1" diff --git a/tests/engine-solver.test.js b/tests/engine-solver.test.js index d1310b1..13a622e 100644 --- a/tests/engine-solver.test.js +++ b/tests/engine-solver.test.js @@ -213,6 +213,74 @@ async function testSolveDecisionSorts() { assert.equal(bestCardDecision?.action.cardId, 'card-strong'); } +async function testMaxCandidatesCapsRankedOutputNotEvaluationOrder() { + const state = buildStubState({ + cards: [ + { + id: 'card-bad', + userId: 'user-1', + issuer: 'Issuer', + label: 'Bad Card', + network: 'VISA', + productSlug: null, + rewardRules: [ + { + id: 'rule-bad', + cardId: 'card-bad', + categoryKey: 'DINING', + rateType: 'CASHBACK', + rateValue: 0.01, + confidence: 1, + source: 'STATIC_CONFIG', + }, + ], + isCredit: false, + isActive: true, + isVirtual: false, + }, + { + id: 'card-good', + userId: 'user-1', + issuer: 'Issuer', + label: 'Good Card', + network: 'VISA', + productSlug: null, + rewardRules: [ + { + id: 'rule-good', + cardId: 'card-good', + categoryKey: 'DINING', + rateType: 'CASHBACK', + rateValue: 0.03, + confidence: 1, + source: 'STATIC_CONFIG', + }, + ], + isCredit: false, + isActive: true, + isVirtual: false, + }, + ], + }); + const ctx = buildStubContext({ amountCents: 1_000 }); + const bad = { type: 'USE_CARD', cardId: 'card-bad' }; + const good = { type: 'USE_CARD', cardId: 'card-good' }; + + async function topCardIdFor(candidateActionsOverride) { + const result = await solveDecision(state, ctx, { + candidateActionsOverride, + maxCandidates: 1, + }); + + assert.equal(result.decisions.length, 1); + assert.equal(result.trace.candidates.length, 1); + return result.decisions[0]?.action.cardId; + } + + assert.equal(await topCardIdFor([bad, good]), 'card-good'); + assert.equal(await topCardIdFor([good, bad]), 'card-good'); +} + async function testDeterministicOrderingForEqualScores() { const state = buildStubState({ debts: [ @@ -1253,6 +1321,7 @@ function testGetEngineCapabilitiesDefaultsToUnavailable() { async function run() { await testSolveDecisionSorts(); + await testMaxCandidatesCapsRankedOutputNotEvaluationOrder(); await testDeterministicOrderingForEqualScores(); await testSolveDecisionValidation(); await testSafeSolveDecisionSuccess(); diff --git a/tests/node/engine-solver.test.js b/tests/node/engine-solver.test.js index 30a80b9..b715ec0 100644 --- a/tests/node/engine-solver.test.js +++ b/tests/node/engine-solver.test.js @@ -213,6 +213,74 @@ async function testSolveDecisionSorts() { assert.equal(bestCardDecision?.action.cardId, 'card-strong'); } +async function testMaxCandidatesCapsRankedOutputNotEvaluationOrder() { + const state = buildStubState({ + cards: [ + { + id: 'card-bad', + userId: 'user-1', + issuer: 'Issuer', + label: 'Bad Card', + network: 'VISA', + productSlug: null, + rewardRules: [ + { + id: 'rule-bad', + cardId: 'card-bad', + categoryKey: 'DINING', + rateType: 'CASHBACK', + rateValue: 0.01, + confidence: 1, + source: 'STATIC_CONFIG', + }, + ], + isCredit: false, + isActive: true, + isVirtual: false, + }, + { + id: 'card-good', + userId: 'user-1', + issuer: 'Issuer', + label: 'Good Card', + network: 'VISA', + productSlug: null, + rewardRules: [ + { + id: 'rule-good', + cardId: 'card-good', + categoryKey: 'DINING', + rateType: 'CASHBACK', + rateValue: 0.03, + confidence: 1, + source: 'STATIC_CONFIG', + }, + ], + isCredit: false, + isActive: true, + isVirtual: false, + }, + ], + }); + const ctx = buildStubContext({ amountCents: 1_000 }); + const bad = { type: 'USE_CARD', cardId: 'card-bad' }; + const good = { type: 'USE_CARD', cardId: 'card-good' }; + + async function topCardIdFor(candidateActionsOverride) { + const result = await solveDecision(state, ctx, { + candidateActionsOverride, + maxCandidates: 1, + }); + + assert.equal(result.decisions.length, 1); + assert.equal(result.trace.candidates.length, 1); + return result.decisions[0]?.action.cardId; + } + + assert.equal(await topCardIdFor([bad, good]), 'card-good'); + assert.equal(await topCardIdFor([good, bad]), 'card-good'); +} + async function testDeterministicOrderingForEqualScores() { const state = buildStubState({ debts: [ @@ -1257,6 +1325,7 @@ function testGetEngineCapabilitiesDefaultsToUnavailable() { async function run() { await testSolveDecisionSorts(); + await testMaxCandidatesCapsRankedOutputNotEvaluationOrder(); await testDeterministicOrderingForEqualScores(); await testSolveDecisionValidation(); await testSafeSolveDecisionSuccess(); diff --git a/tests/replay/index/engine@engine_behavior_v5__engine_input_v1__engine_candidate_space_v1__engine_accounting_v1.json b/tests/replay/index/engine@engine_behavior_v5__engine_input_v1__engine_candidate_space_v1__engine_accounting_v1.json new file mode 100644 index 0000000..82d7987 --- /dev/null +++ b/tests/replay/index/engine@engine_behavior_v5__engine_input_v1__engine_candidate_space_v1__engine_accounting_v1.json @@ -0,0 +1,11 @@ +{ + "hashes": [ + "603b9ebe8034a477e2969d7da70002d0bc8cf86b91f4c4c183fe58d26556c6fa" + ], + "versions": { + "engineAccountingVersion": "engine_accounting_v1", + "engineBehaviorVersion": "engine_behavior_v5", + "engineCandidateSpaceVersion": "engine_candidate_space_v1", + "engineInputVersion": "engine_input_v1" + } +}