From f49980a07284ea178995b18cd6ef92e4fc651b6e Mon Sep 17 00:00:00 2001 From: nicolascukas Date: Tue, 26 May 2026 22:22:32 +0200 Subject: [PATCH 1/6] =?UTF-8?q?test:=20add=20failing=20differential=20fixt?= =?UTF-8?q?ures=20for=20array/string-method=20portability=20=E2=80=94=20go?= =?UTF-8?q?al=20oracle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The differential harness exposed a cliff the codegen-string tests masked: the most-common portable form (filter/map/find with x.field) emits ATTRIBUTE access, which raises AttributeError on dict elements — the real shape of inline-schema, fetch-JSON, and literal arrays. Plus whole method families (array includes/join/slice/indexOf/some/every/reduce; string includes/slice/substring/indexOf/padStart/padEnd/repeat) are unlowered, and split(sep,limit) silently diverges (JS first-N vs Python maxsplit). 30 RED fixtures in 3 gateable slices (arr-core / arr-method / str-method). Each slice is a goal task gated on `conformance.mjs --filter ":"`. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/conformance.mjs | 53 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/scripts/conformance.mjs b/scripts/conformance.mjs index b5de1f90..a4581930 100644 --- a/scripts/conformance.mjs +++ b/scripts/conformance.mjs @@ -271,6 +271,59 @@ const FIXTURES = [ { name: 'toFixed on a bracket-access receiver (quote-safe)', expr: 'data["price"].toFixed(2)', path: '/api/n', bindings: { locals: { data: { price: 3.14159 } } }, expected: '3.14' }, { name: 'Number.isInteger true', expr: 'Number.isInteger(a)', path: '/api/n', bindings: { locals: { a: 5 } }, expected: true }, { name: 'Number.isInteger false', expr: 'Number.isInteger(a)', path: '/api/n', bindings: { locals: { a: 5.5 } }, expected: false }, + + // ────────────────────────────────────────────────────────────────────────── + // BACKFILL ORACLE (goal: conformance-backfill, 2026-05-26). These RED fixtures + // encode bugs the differential harness caught that codegen-string tests masked. + // Each slice (arr-core / arr-method / str-method) is a goal task; the task gate + // is `node scripts/conformance.mjs --filter ":"`. Element bindings are + // DICTS (json.loads) — the real shape of inline-schema/fetch/literal arrays — + // so attribute access `x.field` MUST lower to subscript `x["field"]`. + // ────────────────────────────────────────────────────────────────────────── + + // ── arr-core: filter/map/find on DICT elements + arrow-shape robustness ── + // BUG: lowerJsArrayMethods emits `[x.field for x in ...]` (attribute access), + // which raises AttributeError on dict elements. Must emit `x["field"]`. + { name: 'arr-core: filter on a dict field', expr: 'items.filter((x) => x.active)', path: '/api/a', bindings: { locals: { items: [{ active: true, n: 1 }, { active: false, n: 2 }] } }, expected: [{ active: true, n: 1 }] }, + { name: 'arr-core: map a dict field', expr: 'items.map((x) => x.n)', path: '/api/a', bindings: { locals: { items: [{ active: true, n: 1 }, { active: false, n: 2 }] } }, expected: [1, 2] }, + { name: 'arr-core: find on a dict field', expr: 'items.find((x) => x.n === 2)', path: '/api/a', bindings: { locals: { items: [{ active: true, n: 1 }, { active: false, n: 2 }] } }, expected: { active: false, n: 2 } }, + { name: 'arr-core: chained filter then map (dict fields)', expr: 'items.filter((x) => x.active).map((x) => x.n)', path: '/api/a', bindings: { locals: { items: [{ active: true, n: 1 }, { active: false, n: 2 }] } }, expected: [1] }, + { name: 'arr-core: map a nested dict field x.meta.tag', expr: 'items.map((x) => x.meta.tag)', path: '/api/a', bindings: { locals: { items: [{ meta: { tag: 'a' } }, { meta: { tag: 'b' } }] } }, expected: ['a', 'b'] }, + // arrow-shape robustness: index param, element+index, bare (unparenthesized) + // param, and a 2-level-nested arrow body combined with member access. + { name: 'arr-core: map with an index param (x, i) => i', expr: 'items.map((x, i) => i)', path: '/api/a', bindings: { locals: { items: [{ n: 1 }, { n: 2 }] } }, expected: [0, 1] }, + { name: 'arr-core: map element + index (dict field + i)', expr: 'items.map((x, i) => x.n + i)', path: '/api/a', bindings: { locals: { items: [{ n: 10 }, { n: 20 }] } }, expected: [10, 21] }, + { name: 'arr-core: filter with a bare (unparenthesized) param', expr: 'items.filter(x => x.active)', path: '/api/a', bindings: { locals: { items: [{ active: true, n: 1 }, { active: false, n: 2 }] } }, expected: [{ active: true, n: 1 }] }, + { name: 'arr-core: map with a 2-level nested body + member access', expr: 'items.map((x) => Math.max(x.n, 0))', path: '/api/a', bindings: { locals: { items: [{ n: -1 }, { n: 5 }] } }, expected: [0, 5] }, + + // ── arr-method: array methods NOT yet lowered → AttributeError on Python ── + // includes/indexOf/slice work for both str+array via `in`/slicing; indexOf on a + // MISSING element must yield -1 (JS), not raise (Python list.index raises). + { name: 'arr-method: includes true', expr: 'nums.includes(2)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: true }, + { name: 'arr-method: includes false', expr: 'nums.includes(9)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: false }, + { name: 'arr-method: join coerces to strings', expr: 'nums.join(",")', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: '1,2,3' }, + { name: 'arr-method: slice(0, 2)', expr: 'nums.slice(0, 2)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: [1, 2] }, + { name: 'arr-method: slice(-2) negative start', expr: 'nums.slice(-2)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: [2, 3] }, + { name: 'arr-method: indexOf present', expr: 'nums.indexOf(2)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: 1 }, + { name: 'arr-method: indexOf missing is -1 (not raise)', expr: 'nums.indexOf(9)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: -1 }, + { name: 'arr-method: some (scalar predicate)', expr: 'nums.some((n) => n === 2)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: true }, + { name: 'arr-method: every (scalar predicate)', expr: 'nums.every((n) => n > 0)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: true }, + { name: 'arr-method: reduce sum with seed', expr: 'nums.reduce((a, b) => a + b, 0)', path: '/api/a', bindings: { locals: { nums: [1, 2, 3] } }, expected: 6 }, + + // ── str-method: string methods NOT yet lowered → AttributeError on Python ── + // split(sep, limit) is the SILENT trap: JS keeps the first `limit` parts; + // Python's maxsplit keeps ALL parts (limit splits) → wrong result, no error. + { name: 'str-method: includes true', expr: 's.includes("ana")', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: true }, + { name: 'str-method: includes false', expr: 's.includes("zzz")', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: false }, + { name: 'str-method: slice(1, 3)', expr: 's.slice(1, 3)', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: 'an' }, + { name: 'str-method: slice(-3) negative start', expr: 's.slice(-3)', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: 'ana' }, + { name: 'str-method: substring(0, 2)', expr: 's.substring(0, 2)', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: 'ba' }, + { name: 'str-method: indexOf present', expr: 's.indexOf("n")', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: 2 }, + { name: 'str-method: indexOf missing is -1', expr: 's.indexOf("z")', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: -1 }, + { name: 'str-method: padStart', expr: 's.padStart(8, "0")', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: '00banana' }, + { name: 'str-method: padEnd', expr: 's.padEnd(8, "0")', path: '/api/s', bindings: { locals: { s: 'banana' } }, expected: 'banana00' }, + { name: 'str-method: repeat', expr: 's.repeat(2)', path: '/api/s', bindings: { locals: { s: 'ab' } }, expected: 'abab' }, + { name: 'str-method: split with a limit (JS first-N, not maxsplit)', expr: 's.split(",", 2)', path: '/api/s', bindings: { locals: { s: 'a,b,c' } }, expected: ['a', 'b'] }, ]; // ── Value → literal emitters ──────────────────────────────────────────────── From 13324b5c1cc429404c677601a6667cab8019ce7a Mon Sep 17 00:00:00 2001 From: nicolascukas Date: Wed, 27 May 2026 10:42:29 +0200 Subject: [PATCH 2/6] =?UTF-8?q?fix(python):=20array=20filter/map/find=20?= =?UTF-8?q?=E2=80=94=20balanced=20scanner=20+=20dict-element=20subscriptin?= =?UTF-8?q?g=20(arr-core)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Salvages and corrects the codex forge candidate for the conformance-backfill arr-core task. Replaces the fragile regex-iteration array-method lowering with a balanced, string-aware scanner (consistent with the JSON/Math/Number lowerers), fixing two defects in the candidate: - single-char fields (`x.n`) were not subscripted (regex required 2+ chars) - chained `.filter().map()` produced garbled syntax once filter emitted a quoted subscript, because the regex receiver char-class couldn't span `"` Member access on the bound element now lowers to dict-subscript form (`x.active` → `x["active"]`, `x.meta.tag` → `x["meta"]["tag"]`) so iterating a list of dicts works at runtime; chaining composes by nesting comprehensions; `map((x, i) => …)` lowers via enumerate(). Method calls (`x.toUpperCase()`) are left for the string-method pass. Updates 6 codegen-string tests that had frozen the prior attribute-access output the differential harness proves fails on dicts. arr-core conformance: 9/9. Full fastapi suite green. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/python/src/fastapi-response.ts | 173 ++++++++++++++++++++---- packages/python/tests/fastapi.test.ts | 33 +++-- 2 files changed, 171 insertions(+), 35 deletions(-) diff --git a/packages/python/src/fastapi-response.ts b/packages/python/src/fastapi-response.ts index a7a97029..d9064ffc 100644 --- a/packages/python/src/fastapi-response.ts +++ b/packages/python/src/fastapi-response.ts @@ -47,18 +47,6 @@ export function generateRespondFastAPI(respondNode: IRNode, indent: string): str return [`${indent}return Response(status_code=200)`]; } -// One level of nested parens inside the arrow body: matches `(u.age > 18)`, -// `Math.max(a, b)`, etc. Two-or-more levels still fall through (acceptable -// fallback per the lift-rate metric). -const ARROW_BODY = '((?:[^()]|\\([^()]*\\))+)'; -// Receiver allows brackets + spaces so chained calls work after the inner -// call has already been rewritten to a list-comprehension (which contains -// brackets). The outer iteration re-runs the regex on the rewritten form. -const ARROW_RECEIVER = '([\\w.\\[\\] ]+?)'; - -const FILTER_RE = new RegExp(`${ARROW_RECEIVER}\\.filter\\(\\((\\w+)\\)\\s*=>\\s*${ARROW_BODY}\\)`, 'g'); -const MAP_RE = new RegExp(`${ARROW_RECEIVER}\\.map\\(\\((\\w+)\\)\\s*=>\\s*${ARROW_BODY}\\)`, 'g'); -const FIND_RE = new RegExp(`${ARROW_RECEIVER}\\.find\\(\\((\\w+)\\)\\s*=>\\s*${ARROW_BODY}\\)`, 'g'); // Quoted strings absorbed by the alternation; only literal `===`/`!==` // outside strings get rewritten. Both single and double quotes AND // backtick template literals are covered so a message like @@ -75,24 +63,157 @@ const STRICT_EQ_RE = new RegExp(`${STRING_LITERAL_ALT}|===|!==`, 'g'); // commit 68565826. const JS_LITERAL_RE = new RegExp(`${STRING_LITERAL_ALT}|(? 0) { + if (text[k] === '(') { + out += text.slice(i, k); // method call — leave for the string pass + } else { + out += varName + fields.map((field) => `[${JSON.stringify(field)}]`).join(''); + } + i = k; + continue; + } + } + out += c; + i += 1; + } + return out; +} + +// Parse an arrow callback's argument text into `{ params, body }`, or null when +// it isn't a single arrow function (e.g. `.map(fn)` with a bare reference, which +// is left unchanged). Handles `(p) => body`, `p => body`, and `(p, i) => body`. +function parseArrowCallback(inner: string): { params: string[]; body: string } | null { + const trimmed = inner.trim(); + if (trimmed.startsWith('(')) { + const close = matchBalancedParen(trimmed, 0); + if (close === -1) return null; + const after = trimmed.slice(close + 1).trim(); + if (!after.startsWith('=>')) return null; + const params = splitTopLevelArgs(trimmed.slice(1, close)) + .map((s) => s.trim()) + .filter(Boolean); + return { params, body: after.slice(2).trim() }; + } + const m = trimmed.match(/^([A-Za-z_$][\w$]*)\s*=>\s*([\s\S]+)$/); + if (!m) return null; + return { params: [m[1]], body: m[2].trim() }; +} + +// Lower JS arrow-callback array methods to Python comprehensions: +// arr.filter((x) => pred) -> [x for x in arr if pred] +// arr.map((x) => body) -> [body for x in arr] +// arr.map((x, i) => body) -> [body for i, x in enumerate(arr)] +// arr.find((x) => pred) -> next((x for x in arr if pred), None) +// Balanced + string-aware scan (NOT regex): the receiver is taken from the +// already-emitted output via findReceiverStart, so chained calls compose +// naturally (`arr.filter(...).map(...)` nests one comprehension inside the +// next) and the quotes/brackets of a lowered comprehension can never desync the +// receiver — the failure mode of the prior regex form. Member access on the +// bound element is dict-subscripted so a list-of-dicts iterates correctly. +const ARROW_ARRAY_METHODS = new Set(['filter', 'map', 'find']); + function lowerJsArrayMethods(expr: string): string { - // Iterate so chained calls (`.filter(...).map(...)`) collapse fully. - // Each pass rewrites the innermost matchable call; the broadened - // receiver picks up the list-comprehension produced by the prior pass. - // Bounded at 8 iterations to prevent any accidental infinite-loop bug; - // realistic chains rarely exceed 3-4 calls. - let prev = ''; - let next = expr; + let out = ''; let i = 0; - while (prev !== next && i < 8) { - prev = next; - next = next - .replace(FILTER_RE, (_m, arr, varName, pred) => `[${varName} for ${varName} in ${arr} if ${pred}]`) - .replace(MAP_RE, (_m, arr, varName, body) => `[${body} for ${varName} in ${arr}]`) - .replace(FIND_RE, (_m, arr, varName, pred) => `next((${varName} for ${varName} in ${arr} if ${pred}), None)`); + let quote: string | null = null; + while (i < expr.length) { + const c = expr[i]; + if (quote) { + out += c; + if (c === '\\') { + out += expr[i + 1] ?? ''; + i += 2; + continue; + } + if (c === quote) quote = null; + i += 1; + continue; + } + if (c === '"' || c === "'" || c === '`') { + quote = c; + out += c; + i += 1; + continue; + } + const m = expr.slice(i).match(/^\.([A-Za-z]\w*)\(/); + if (m && ARROW_ARRAY_METHODS.has(m[1])) { + const method = m[1]; + const openIdx = i + m[0].length - 1; + const closeIdx = matchBalancedParen(expr, openIdx); + const recvStart = findReceiverStart(out); + if (closeIdx !== -1 && recvStart !== -1) { + const arrow = parseArrowCallback(expr.slice(openIdx + 1, closeIdx)); + if (arrow && arrow.params.length >= 1) { + const receiver = out.slice(recvStart); + const pre = out.slice(0, recvStart); + const elemVar = arrow.params[0]; + const idxVar = arrow.params[1]; + // Recurse for nested array methods in the body; subscript the element + // var's member access. The index var (if any) stays a bare int. + const body = lowerJsArrayMethods(lowerDictMemberAccess(arrow.body, elemVar)); + let lowered: string; + if (method === 'filter') { + lowered = `[${elemVar} for ${elemVar} in ${receiver} if ${body}]`; + } else if (method === 'find') { + lowered = `next((${elemVar} for ${elemVar} in ${receiver} if ${body}), None)`; + } else if (idxVar) { + lowered = `[${body} for ${idxVar}, ${elemVar} in enumerate(${receiver})]`; + } else { + lowered = `[${body} for ${elemVar} in ${receiver}]`; + } + out = `${pre}${lowered}`; + i = closeIdx + 1; + continue; + } + } + } + out += c; i += 1; } - return next; + return out; } // Index of the bracket that closes the one at `openIdx`, tracking ()[]{} depth diff --git a/packages/python/tests/fastapi.test.ts b/packages/python/tests/fastapi.test.ts index 3bec392a..b56d38b7 100644 --- a/packages/python/tests/fastapi.test.ts +++ b/packages/python/tests/fastapi.test.ts @@ -2278,18 +2278,18 @@ describe('FastAPI Transpiler', () => { test('.filter((x) => pred) lowers to list comprehension', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); - expect(rewriteFastAPIExpr('users.filter((u) => u.active)', [])).toBe('[u for u in users if u.active]'); + expect(rewriteFastAPIExpr('users.filter((u) => u.active)', [])).toBe('[u for u in users if u["active"]]'); }); test('.map((x) => expr) lowers to list comprehension', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); - expect(rewriteFastAPIExpr('users.map((u) => u.name)', [])).toBe('[u.name for u in users]'); + expect(rewriteFastAPIExpr('users.map((u) => u.name)', [])).toBe('[u["name"] for u in users]'); }); test('.find((x) => pred) lowers to next() with None default', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); expect(rewriteFastAPIExpr('users.find((u) => u.id == id)', [])).toBe( - 'next((u for u in users if u.id == id), None)', + 'next((u for u in users if u["id"] == id), None)', ); }); @@ -2298,14 +2298,29 @@ describe('FastAPI Transpiler', () => { // Arrow rewrite runs first; the inner `===` is then caught by the // strict-equality pass on the rewritten predicate. expect(rewriteFastAPIExpr('users.find((item) => item.id === id)', [])).toBe( - 'next((item for item in users if item.id == id), None)', + 'next((item for item in users if item["id"] == id), None)', ); }); - test('arrows with multi-arg fall through untouched', async () => { + test('map with index arg lowers via enumerate()', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); - // Two-arg arrow — not lowered (the simple regex requires single arg) - expect(rewriteFastAPIExpr('arr.map((u, i) => u.name)', [])).toBe('arr.map((u, i) => u.name)'); + expect(rewriteFastAPIExpr('arr.map((u, i) => u.name)', [])).toBe('[u["name"] for i, u in enumerate(arr)]'); + }); + + test('arr-core dict member access lowers to subscript form', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + expect(rewriteFastAPIExpr('items.filter((x) => x.active)', [])).toBe('[x for x in items if x["active"]]'); + expect(rewriteFastAPIExpr('items.map((x) => x.n)', [])).toBe('[x["n"] for x in items]'); + expect(rewriteFastAPIExpr('items.find((x) => x.n === 2)', [])).toBe( + 'next((x for x in items if x["n"] == 2), None)', + ); + }); + + test('arr-core supports bare arrow params and nested dict member access', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + expect(rewriteFastAPIExpr('items.filter(x => x.active)', [])).toBe('[x for x in items if x["active"]]'); + expect(rewriteFastAPIExpr('items.map((x) => x.meta.tag)', [])).toBe('[x["meta"]["tag"] for x in items]'); + expect(rewriteFastAPIExpr('items.map((x, i) => x.n + i)', [])).toBe('[x["n"] + i for i, x in enumerate(items)]'); }); test('=== / !== are skipped when inside string literals (Codex P2)', async () => { @@ -2323,13 +2338,13 @@ describe('FastAPI Transpiler', () => { test('chained .filter().map() rewrites fully (Gemini #2)', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); expect(rewriteFastAPIExpr('users.filter((u) => u.active).map((u) => u.name)', [])).toBe( - '[u.name for u in [u for u in users if u.active]]', + '[u["name"] for u in [u for u in users if u["active"]]]', ); }); test('arrow predicate with one level of nested parens (Gemini #3)', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); - expect(rewriteFastAPIExpr('users.filter((u) => (u.age > 18))', [])).toBe('[u for u in users if (u.age > 18)]'); + expect(rewriteFastAPIExpr('users.filter((u) => (u.age > 18))', [])).toBe('[u for u in users if (u["age"] > 18)]'); }); test('undefined / null lower to None outside strings (Gemini #4)', async () => { From 26e72383c670a7991cdd9c012a2a312ffcf0c4f5 Mon Sep 17 00:00:00 2001 From: nicolascukas Date: Wed, 27 May 2026 10:54:51 +0200 Subject: [PATCH 3/6] feat(python): lower portable Array methods includes/indexOf/join/slice/some/every/reduce (arr-method) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Winner of the agon forge for the conformance-backfill arr-method task (codex, score 93; gemini also passed). Extends the task-01 balanced array-method scanner with the value/predicate-arg methods, reusing parseArrowCallback + lowerDictMemberAccess for the callback forms: arr.includes(v) -> (v in arr) arr.indexOf(v[, from]) -> next((__i for __i,__v in enumerate(arr) ...), -1) (-1-safe, no raise) arr.join(sep) -> sep.join(str(__v) for __v in arr) (JS str coercion) arr.slice(a, b) -> arr[a:b] (1-arg + negative forms) arr.some((x) => P) -> any(P for x in arr) arr.every((x) => P) -> all(P for x in arr) arr.reduce((a, b) => B, s) -> functools.reduce(lambda a, b: B, arr, s) (adds import functools) reduce defers its lambda `:` via a placeholder until after the dict-key-quoting pass so the colon isn't mistaken for a dict-key separator. includes/indexOf/slice are receiver-agnostic, so the string-receiver str-method fixtures for those also pass now (str-method 11→5 remaining). arr-method conformance: 10/10. arr-core still 9/9. Full fastapi suite green. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/python/src/fastapi-response.ts | 80 ++++++++++++++++++++++++- packages/python/tests/fastapi.test.ts | 35 +++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) diff --git a/packages/python/src/fastapi-response.ts b/packages/python/src/fastapi-response.ts index d9064ffc..4f54f893 100644 --- a/packages/python/src/fastapi-response.ts +++ b/packages/python/src/fastapi-response.ts @@ -154,8 +154,10 @@ function parseArrowCallback(inner: string): { params: string[]; body: string } | // receiver — the failure mode of the prior regex form. Member access on the // bound element is dict-subscripted so a list-of-dicts iterates correctly. const ARROW_ARRAY_METHODS = new Set(['filter', 'map', 'find']); +const PORTABLE_ARRAY_METHODS = new Set(['includes', 'indexOf', 'join', 'slice', 'some', 'every', 'reduce']); +const LAMBDA_COLON_PLACEHOLDER = '__KERN_LAMBDA_COLON__'; -function lowerJsArrayMethods(expr: string): string { +function lowerJsArrayMethods(expr: string, imports?: Set): string { let out = ''; let i = 0; let quote: string | null = null; @@ -193,7 +195,7 @@ function lowerJsArrayMethods(expr: string): string { const idxVar = arrow.params[1]; // Recurse for nested array methods in the body; subscript the element // var's member access. The index var (if any) stays a bare int. - const body = lowerJsArrayMethods(lowerDictMemberAccess(arrow.body, elemVar)); + const body = lowerJsArrayMethods(lowerDictMemberAccess(arrow.body, elemVar), imports); let lowered: string; if (method === 'filter') { lowered = `[${elemVar} for ${elemVar} in ${receiver} if ${body}]`; @@ -210,6 +212,77 @@ function lowerJsArrayMethods(expr: string): string { } } } + const mArray = expr.slice(i).match(/^\.([A-Za-z]\w*)\(/); + if (mArray && PORTABLE_ARRAY_METHODS.has(mArray[1])) { + const method = mArray[1]; + const openIdx = i + mArray[0].length - 1; + const closeIdx = matchBalancedParen(expr, openIdx); + const recvStart = findReceiverStart(out); + if (closeIdx !== -1 && recvStart !== -1) { + const receiver = out.slice(recvStart); + const pre = out.slice(0, recvStart); + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => + lowerJsArrayMethods(a.trim(), imports), + ); + let lowered: string | null = null; + if (method === 'includes') { + const needle = args[0] ?? ''; + lowered = `(${needle} in ${receiver})`; + } else if (method === 'indexOf') { + const needle = args[0] ?? ''; + const fromIndex = args[1] ?? null; + if (fromIndex) { + lowered = `(next((__i for __i, __v in enumerate(${receiver}) if __i >= ${fromIndex} and __v == ${needle}), -1))`; + } else { + lowered = `(next((__i for __i, __v in enumerate(${receiver}) if __v == ${needle}), -1))`; + } + } else if (method === 'join') { + const sep = args[0] ?? '","'; + lowered = `${sep}.join(str(__v) for __v in ${receiver})`; + } else if (method === 'slice') { + const start = args[0]; + const end = args[1]; + if (!start && !end) lowered = `${receiver}[:]`; + else if (start && !end) lowered = `${receiver}[${start}:]`; + else if (!start && end) lowered = `${receiver}[:${end}]`; + else lowered = `${receiver}[${start}:${end}]`; + } else if (method === 'some' || method === 'every') { + const arrow = parseArrowCallback(expr.slice(openIdx + 1, closeIdx)); + if (arrow && arrow.params.length >= 1) { + const elemVar = arrow.params[0]; + let body = lowerDictMemberAccess(arrow.body, elemVar); + if (arrow.params[1]) body = lowerDictMemberAccess(body, arrow.params[1]); + const pred = lowerJsArrayMethods(body, imports); + lowered = + method === 'some' + ? `any(${pred} for ${elemVar} in ${receiver})` + : `all(${pred} for ${elemVar} in ${receiver})`; + } + } else if (method === 'reduce') { + const rawArgs = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)); + const arrow = parseArrowCallback(rawArgs[0] ?? ''); + if (arrow && arrow.params.length >= 2) { + const accVar = arrow.params[0]; + const elemVar = arrow.params[1]; + let body = lowerDictMemberAccess(arrow.body, accVar); + body = lowerDictMemberAccess(body, elemVar); + const loweredBody = lowerJsArrayMethods(body, imports); + imports?.add('import functools'); + if (rawArgs.length >= 2) { + const seed = lowerJsArrayMethods(rawArgs[1].trim(), imports); + lowered = `functools.reduce(lambda ${accVar}, ${elemVar}${LAMBDA_COLON_PLACEHOLDER} ${loweredBody}, ${receiver}, ${seed})`; + } else { + lowered = `functools.reduce(lambda ${accVar}, ${elemVar}${LAMBDA_COLON_PLACEHOLDER} ${loweredBody}, ${receiver})`; + } + } + } + if (lowered) { + out = `${pre}${lowered}`; + i = closeIdx + 1; + continue; + } + } + } out += c; i += 1; } @@ -1175,7 +1248,7 @@ export function rewriteFastAPIExpr( // Array methods first (so any `===` inside an arrow body is hoisted into // a list-comprehension predicate that the strict-equality pass below // then catches). - result = lowerJsArrayMethods(result); + result = lowerJsArrayMethods(result, imports); // Strict equality: skip text inside quoted strings so a user message // like `"use === for strict equality"` doesn't get mangled to `==`. @@ -1243,6 +1316,7 @@ export function rewriteFastAPIExpr( for (const replacement of replacements) { result = result.split(replacement.placeholder).join(replacement.lowered); } + result = result.split(LAMBDA_COLON_PLACEHOLDER).join(':'); return result; } diff --git a/packages/python/tests/fastapi.test.ts b/packages/python/tests/fastapi.test.ts index b56d38b7..8ca1e8ec 100644 --- a/packages/python/tests/fastapi.test.ts +++ b/packages/python/tests/fastapi.test.ts @@ -2342,6 +2342,41 @@ describe('FastAPI Transpiler', () => { ); }); + test('arr-method lowerings rewrite to Python forms (and drop JS method syntax)', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + const out = rewriteFastAPIExpr( + 'nums.includes(2) && nums.indexOf(2) >= 0 && nums.join(",") == "1,2,3" && nums.slice(0, 2)', + [], + ); + expect(out).toContain('(2 in nums)'); + expect(out).toContain('next((__i for __i, __v in enumerate(nums) if __v == 2), -1)'); + expect(out).toContain('",".join(str(__v) for __v in nums)'); + expect(out).toContain('nums[0:2]'); + expect(out).not.toContain('nums.includes('); + expect(out).not.toContain('nums.indexOf('); + expect(out).not.toContain('nums.join('); + expect(out).not.toContain('nums.slice('); + }); + + test('arr-method callbacks some/every/reduce lower and reduce adds functools import', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + const imports = new Set(); + const out = rewriteFastAPIExpr( + 'nums.some((n) => n === 2) && nums.every((n) => n > 0) && nums.reduce((a, b) => a + b, 0)', + [], + new Set(), + false, + imports, + ); + expect(out).toContain('any(n == 2 for n in nums)'); + expect(out).toContain('all(n > 0 for n in nums)'); + expect(out).toContain('functools.reduce(lambda a, b: a + b, nums, 0)'); + expect(out).not.toContain('nums.some('); + expect(out).not.toContain('nums.every('); + expect(out).not.toContain('nums.reduce('); + expect(imports.has('import functools')).toBe(true); + }); + test('arrow predicate with one level of nested parens (Gemini #3)', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); expect(rewriteFastAPIExpr('users.filter((u) => (u.age > 18))', [])).toBe('[u for u in users if (u["age"] > 18)]'); From 22c65edf10a0f921e670d10d7355cf1c5b37e877 Mon Sep 17 00:00:00 2001 From: nicolascukas Date: Wed, 27 May 2026 11:02:49 +0200 Subject: [PATCH 4/6] feat(python): lower portable String methods substring/padStart/padEnd/repeat + split-limit trap (str-method) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Winner of the agon forge for the conformance-backfill str-method task (claude, score 93; codex also passed). includes/indexOf/slice already lowered via the receiver-agnostic arr-method path; this adds the string-specific methods: s.padStart(n, fill) -> s.rjust(n, fill) (bare rename; default fill " ") s.padEnd(n, fill) -> s.ljust(n, fill) s.substring(a, b) -> s[a:b] (simple non-negative form) s.repeat(n) -> (s * n) s.split(sep, limit) -> s.split(sep)[:limit] THE TRAP: Python's 2nd split arg is maxsplit (keeps the remainder); JS keeps only the first `limit` parts. No-limit s.split(sep) left raw. Generalizes lowerStringReplaceFirstOnly → lowerStringArgMethods (one balanced, string-aware scan shared across replace/substring/repeat/split; no new char loop). substring's JS arg-swap/negative-clamp edge is scoped out with a comment (diverges from Python slicing); only the fixture's non-negative form is lowered. Full conformance: 84/84 (arr-core 9 + arr-method 10 + str-method 11 + the rest). Full fastapi suite green. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/python/src/fastapi-response.ts | 100 ++++++++++++++++++++---- packages/python/tests/fastapi.test.ts | 46 +++++++++++ 2 files changed, 132 insertions(+), 14 deletions(-) diff --git a/packages/python/src/fastapi-response.ts b/packages/python/src/fastapi-response.ts index 4f54f893..f4ae0675 100644 --- a/packages/python/src/fastapi-response.ts +++ b/packages/python/src/fastapi-response.ts @@ -633,11 +633,17 @@ function lowerNumberBuiltinCalls(expr: string, imports?: Set): string { // x.toUpperCase() -> x.upper() // x.toLowerCase() -> x.lower() // x.trim() -> x.strip() -// Skip string literals so text like "a.toUpperCase()" stays unchanged. +// x.padStart(n[, fill]) -> x.rjust(n[, fill]) (JS/Python both default to " ") +// x.padEnd(n[, fill]) -> x.ljust(n[, fill]) +// Skip string literals so text like "a.toUpperCase()" / ".padStart(" stays raw. +// pad*/startsWith/endsWith take args, so only the method+`(` is matched and the +// argument list flows through to Python unchanged. Note: JS pad* accept a +// multi-char fill while Python rjust/ljust require a single fill char — only the +// 1-char fixture form is in scope; a multi-char fill is left to raise on Python. function lowerStringBuiltinCalls(expr: string): string { return expr.replace( new RegExp( - `${STRING_LITERAL_ALT}|\\.toUpperCase\\(\\)|\\.toLowerCase\\(\\)|\\.trim\\(\\)|\\.startsWith\\(|\\.endsWith\\(`, + `${STRING_LITERAL_ALT}|\\.toUpperCase\\(\\)|\\.toLowerCase\\(\\)|\\.trim\\(\\)|\\.startsWith\\(|\\.endsWith\\(|\\.padStart\\(|\\.padEnd\\(`, 'g', ), (match) => { @@ -648,18 +654,33 @@ function lowerStringBuiltinCalls(expr: string): string { // argument list passes through to Python's str.startswith/endswith. if (match === '.startsWith(') return '.startswith('; if (match === '.endsWith(') return '.endswith('; + if (match === '.padStart(') return '.rjust('; + if (match === '.padEnd(') return '.ljust('; return match; }, ); } -// Lower JS String.prototype.replace (string-arg form) to Python's first-only -// replace. JS `s.replace("a", "b")` replaces only the FIRST occurrence, but -// Python str.replace replaces ALL — so emit the count=1 third arg for parity. -// Only the 2-arg form is lowered; a regex first arg (`s.replace(/re/, b)`) is -// out of scope and left unchanged. `.replaceAll(` never matches (it isn't -// `.replace(`). String-aware + balanced so args with commas/parens survive. -function lowerStringReplaceFirstOnly(expr: string): string { +// Lower the argument-taking JS String methods that need more than a bare method +// rename (those are handled by lowerStringBuiltinCalls). One string-aware, +// balanced scan reused for all of them; the shared matchBalancedParen / +// splitTopLevelArgs / findReceiverStart helpers carve out args and receiver so +// no new char-loop matcher is introduced: +// s.replace("a", "b") -> s.replace("a", "b", 1) (JS replaces FIRST only, +// Python str.replace replaces ALL — pin count=1) +// s.substring(a, b) -> s[a:b] (and s.substring(a) -> s[a:]) +// s.repeat(n) -> (s * n) +// s.split(sep, limit) -> s.split(sep)[:limit] THE TRAP: Python's 2nd +// arg is maxsplit, which KEEPS the remainder; JS +// keeps only the first `limit` parts. The no-limit +// s.split(sep) form is left raw (Python matches JS). +// replace: only the 2-arg, non-regex form is lowered (`s.replace(/re/, b)` is +// out of scope); `.replaceAll(` never matches. A quoted `".repeat("` etc. is +// skipped by the quote tracking, so string-literal text stays raw. +// substring edge (scoped out): JS substring clamps negative args to 0 and SWAPS +// them when a > b; Python slicing does neither. Only the simple non-negative +// fixture case is lowered — a negative/swapped substring would diverge. +function lowerStringArgMethods(expr: string): string { let out = ''; let i = 0; let quote: string | null = null; @@ -688,14 +709,64 @@ function lowerStringReplaceFirstOnly(expr: string): string { if (closeIdx !== -1) { const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)); if (args.length === 2 && !args[0].trim().startsWith('/')) { - const a0 = lowerStringReplaceFirstOnly(args[0]).trim(); - const a1 = lowerStringReplaceFirstOnly(args[1]).trim(); + const a0 = lowerStringArgMethods(args[0]).trim(); + const a1 = lowerStringArgMethods(args[1]).trim(); out += `.replace(${a0}, ${a1}, 1)`; i = closeIdx + 1; continue; } } } + if (expr.startsWith('.substring(', i)) { + const openIdx = i + '.substring('.length - 1; + const closeIdx = matchBalancedParen(expr, openIdx); + if (closeIdx !== -1) { + // Receiver is already in `out`; `s.substring(a, b)` and `s[a:b]` both + // trail the receiver, so just append the slice — no receiver surgery. + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => + lowerStringArgMethods(a).trim(), + ); + const start = args[0] ?? ''; + const end = args[1] ?? ''; + out += `[${start}:${end}]`; + i = closeIdx + 1; + continue; + } + } + if (expr.startsWith('.repeat(', i)) { + const openIdx = i + '.repeat('.length - 1; + const closeIdx = matchBalancedParen(expr, openIdx); + if (closeIdx !== -1) { + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => + lowerStringArgMethods(a).trim(), + ); + const n = args[0] ?? '0'; + const receiverStart = findReceiverStart(out); + if (receiverStart !== -1) { + const receiver = out.slice(receiverStart); + const pre = out.slice(0, receiverStart); + out = `${pre}(${receiver} * ${n})`; + i = closeIdx + 1; + continue; + } + } + } + if (expr.startsWith('.split(', i)) { + const openIdx = i + '.split('.length - 1; + const closeIdx = matchBalancedParen(expr, openIdx); + if (closeIdx !== -1) { + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => + lowerStringArgMethods(a).trim(), + ); + // Only the 2-arg limit form needs rewriting; the no-limit form is left + // raw (falls through) because Python str.split(sep) already matches JS. + if (args.length === 2) { + out += `.split(${args[0]})[:${args[1]}]`; + i = closeIdx + 1; + continue; + } + } + } out += c; i += 1; } @@ -1300,10 +1371,11 @@ export function rewriteFastAPIExpr( result = lowerMathBuiltinCalls(result, imports); // Number parsing and formatting builtins. result = lowerNumberBuiltinCalls(result, imports); - // String builtins in portable expressions. + // String builtins in portable expressions (bare renames + pad → rjust/ljust). result = lowerStringBuiltinCalls(result); - // String .replace → first-only parity (JS replaces first; Python replaces all). - result = lowerStringReplaceFirstOnly(result); + // Argument-taking string methods: replace (first-only), substring → slice, + // repeat → `*`, and the split(sep, limit) maxsplit trap. + result = lowerStringArgMethods(result); // Object/Array/Date host builtins in portable expressions. result = lowerObjectArrayDateBuiltinCalls(result, imports); diff --git a/packages/python/tests/fastapi.test.ts b/packages/python/tests/fastapi.test.ts index 8ca1e8ec..1d02d548 100644 --- a/packages/python/tests/fastapi.test.ts +++ b/packages/python/tests/fastapi.test.ts @@ -2425,6 +2425,52 @@ describe('FastAPI Transpiler', () => { // this would still be a property access — preserve. expect(rewriteFastAPIExpr('a.b.true', [])).toBe('a.b.true'); }); + + // ── str-method portability (task 03-str-method): substring/pad*/repeat + + // the split(sep, limit) maxsplit trap. includes/indexOf/slice already + // covered above (receiver-agnostic, shared with arr-method). ────────── + test('substring lowers to a Python slice (JS form dropped)', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + expect(rewriteFastAPIExpr('s.substring(0, 2)', [])).toBe('s[0:2]'); + expect(rewriteFastAPIExpr('s.substring(2)', [])).toBe('s[2:]'); + expect(rewriteFastAPIExpr('s.substring(0, 2)', [])).not.toContain('.substring('); + }); + + test('padStart/padEnd lower to rjust/ljust (default + explicit fill)', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + expect(rewriteFastAPIExpr('s.padStart(8, "0")', [])).toBe('s.rjust(8, "0")'); + expect(rewriteFastAPIExpr('s.padEnd(8, "0")', [])).toBe('s.ljust(8, "0")'); + // 1-arg form: JS pads with " " and Python rjust/ljust default to " " too. + expect(rewriteFastAPIExpr('s.padStart(8)', [])).toBe('s.rjust(8)'); + const out = rewriteFastAPIExpr('s.padStart(8, "0")', []); + expect(out).not.toContain('.padStart('); + expect(out).not.toContain('.padEnd('); + }); + + test('repeat lowers to a parenthesized `*` (receiver pulled in front)', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + expect(rewriteFastAPIExpr('s.repeat(2)', [])).toBe('(s * 2)'); + // Bracket-access receiver is carried into the product too. + expect(rewriteFastAPIExpr('data["k"].repeat(3)', [])).toBe('(data["k"] * 3)'); + expect(rewriteFastAPIExpr('s.repeat(2)', [])).not.toContain('.repeat('); + }); + + test('split(sep, limit) is the maxsplit TRAP → split(sep)[:limit], not maxsplit', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + // JS keeps the FIRST `limit` parts; Python's 2nd arg is maxsplit (keeps + // the remainder), so the only correct lowering truncates AFTER a full split. + expect(rewriteFastAPIExpr('s.split(",", 2)', [])).toBe('s.split(",")[:2]'); + // Must NOT emit the maxsplit form `s.split(",", 2)`. + expect(rewriteFastAPIExpr('s.split(",", 2)', [])).not.toBe('s.split(",", 2)'); + // The existing no-limit form stays raw (Python str.split matches JS). + expect(rewriteFastAPIExpr('s.split(",")', [])).toBe('s.split(",")'); + }); + + test('a quoted ".repeat(" / ".substring(" inside a string literal stays raw', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + expect(rewriteFastAPIExpr('msg = "call .repeat( in docs"', [])).toBe('msg = "call .repeat( in docs"'); + expect(rewriteFastAPIExpr("note = 'use .substring(0, 2) here'", [])).toBe("note = 'use .substring(0, 2) here'"); + }); }); // ── Raw-JS handler guard — portable + stream paths ───────────────── From 56d3e2d1ce7f6384caf68f0901f679136807d770 Mon Sep 17 00:00:00 2001 From: nicolascukas Date: Wed, 27 May 2026 11:08:55 +0200 Subject: [PATCH 5/6] fix(python): bind callback index var for filter/find/some + subscript data fields before a method call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses two latent bugs codex review flagged on the arr-core scanner (ab192611) — both outside the committed conformance fixtures: 1. filter/find/some with a second (index) callback param left the index unbound: `items.filter((x, i) => i > 0)` emitted `[x for x in items if i > 0]` → NameError. Now every arrow method binds the index via enumerate() when a second param is present, matching map. 2. lowerDictMemberAccess left a whole member chain as attribute access when it was followed by `(`, so a data field before a method wasn't subscripted and nested array methods broke on dicts. Now the leading data fields are subscripted and only the final method segment stays attribute access: x.name.toUpperCase() -> x["name"].upper() x.tags.map((t) => t.name) -> [t["name"] for t in x["tags"]] (nested) Adds regression tests for both. Full conformance still 84/84; fastapi suite green. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/python/src/fastapi-response.ts | 48 ++++++++++++++----------- packages/python/tests/fastapi.test.ts | 24 +++++++++++++ 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/packages/python/src/fastapi-response.ts b/packages/python/src/fastapi-response.ts index f4ae0675..f29c3cfb 100644 --- a/packages/python/src/fastapi-response.ts +++ b/packages/python/src/fastapi-response.ts @@ -108,7 +108,14 @@ function lowerDictMemberAccess(text: string, varName: string): string { } if (fields.length > 0) { if (text[k] === '(') { - out += text.slice(i, k); // method call — leave for the string pass + // Method call: subscript the leading DATA fields but keep the final + // segment as attribute access (the method name) so the string-method + // / nested-array passes still see it — `x.name.toUpperCase()` → + // `x["name"].toUpperCase()`, `x.tags.map(...)` → `x["tags"].map(...)` + // (codex review of ab192611). A lone `x.method()` is unchanged. + const dataFields = fields.slice(0, -1); + const methodField = fields[fields.length - 1]; + out += varName + dataFields.map((field) => `[${JSON.stringify(field)}]`).join('') + `.${methodField}`; } else { out += varName + fields.map((field) => `[${JSON.stringify(field)}]`).join(''); } @@ -196,15 +203,19 @@ function lowerJsArrayMethods(expr: string, imports?: Set): string { // Recurse for nested array methods in the body; subscript the element // var's member access. The index var (if any) stays a bare int. const body = lowerJsArrayMethods(lowerDictMemberAccess(arrow.body, elemVar), imports); + // A second callback param is the element index — bind it via + // enumerate() for every method, not just map, so a predicate that + // references the index (`(x, i) => i > 0`) doesn't emit an unbound + // name (codex review of ab192611). + const loopTarget = idxVar ? `${idxVar}, ${elemVar}` : elemVar; + const source = idxVar ? `enumerate(${receiver})` : receiver; let lowered: string; if (method === 'filter') { - lowered = `[${elemVar} for ${elemVar} in ${receiver} if ${body}]`; + lowered = `[${elemVar} for ${loopTarget} in ${source} if ${body}]`; } else if (method === 'find') { - lowered = `next((${elemVar} for ${elemVar} in ${receiver} if ${body}), None)`; - } else if (idxVar) { - lowered = `[${body} for ${idxVar}, ${elemVar} in enumerate(${receiver})]`; + lowered = `next((${elemVar} for ${loopTarget} in ${source} if ${body}), None)`; } else { - lowered = `[${body} for ${elemVar} in ${receiver}]`; + lowered = `[${body} for ${loopTarget} in ${source}]`; } out = `${pre}${lowered}`; i = closeIdx + 1; @@ -250,13 +261,16 @@ function lowerJsArrayMethods(expr: string, imports?: Set): string { const arrow = parseArrowCallback(expr.slice(openIdx + 1, closeIdx)); if (arrow && arrow.params.length >= 1) { const elemVar = arrow.params[0]; - let body = lowerDictMemberAccess(arrow.body, elemVar); - if (arrow.params[1]) body = lowerDictMemberAccess(body, arrow.params[1]); - const pred = lowerJsArrayMethods(body, imports); + const idxVar = arrow.params[1]; + // Only the element var is dict-subscripted; the index var stays a + // bare int and must be bound via enumerate() when present. + const pred = lowerJsArrayMethods(lowerDictMemberAccess(arrow.body, elemVar), imports); + const loopTarget = idxVar ? `${idxVar}, ${elemVar}` : elemVar; + const source = idxVar ? `enumerate(${receiver})` : receiver; lowered = method === 'some' - ? `any(${pred} for ${elemVar} in ${receiver})` - : `all(${pred} for ${elemVar} in ${receiver})`; + ? `any(${pred} for ${loopTarget} in ${source})` + : `all(${pred} for ${loopTarget} in ${source})`; } } else if (method === 'reduce') { const rawArgs = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)); @@ -723,9 +737,7 @@ function lowerStringArgMethods(expr: string): string { if (closeIdx !== -1) { // Receiver is already in `out`; `s.substring(a, b)` and `s[a:b]` both // trail the receiver, so just append the slice — no receiver surgery. - const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => - lowerStringArgMethods(a).trim(), - ); + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => lowerStringArgMethods(a).trim()); const start = args[0] ?? ''; const end = args[1] ?? ''; out += `[${start}:${end}]`; @@ -737,9 +749,7 @@ function lowerStringArgMethods(expr: string): string { const openIdx = i + '.repeat('.length - 1; const closeIdx = matchBalancedParen(expr, openIdx); if (closeIdx !== -1) { - const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => - lowerStringArgMethods(a).trim(), - ); + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => lowerStringArgMethods(a).trim()); const n = args[0] ?? '0'; const receiverStart = findReceiverStart(out); if (receiverStart !== -1) { @@ -755,9 +765,7 @@ function lowerStringArgMethods(expr: string): string { const openIdx = i + '.split('.length - 1; const closeIdx = matchBalancedParen(expr, openIdx); if (closeIdx !== -1) { - const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => - lowerStringArgMethods(a).trim(), - ); + const args = splitTopLevelArgs(expr.slice(openIdx + 1, closeIdx)).map((a) => lowerStringArgMethods(a).trim()); // Only the 2-arg limit form needs rewriting; the no-limit form is left // raw (falls through) because Python str.split(sep) already matches JS. if (args.length === 2) { diff --git a/packages/python/tests/fastapi.test.ts b/packages/python/tests/fastapi.test.ts index 1d02d548..79c477fd 100644 --- a/packages/python/tests/fastapi.test.ts +++ b/packages/python/tests/fastapi.test.ts @@ -2323,6 +2323,30 @@ describe('FastAPI Transpiler', () => { expect(rewriteFastAPIExpr('items.map((x, i) => x.n + i)', [])).toBe('[x["n"] + i for i, x in enumerate(items)]'); }); + test('arr-core filter/find/some with an index param bind it via enumerate (codex review ab192611)', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + // The index var was previously unbound for filter/find/some → NameError. + expect(rewriteFastAPIExpr('items.filter((x, i) => i > 0)', [])).toBe('[x for i, x in enumerate(items) if i > 0]'); + expect(rewriteFastAPIExpr('items.find((x, i) => i === 1)', [])).toBe( + 'next((x for i, x in enumerate(items) if i == 1), None)', + ); + expect(rewriteFastAPIExpr('items.some((x, i) => i > 0)', [])).toBe('any(i > 0 for i, x in enumerate(items))'); + }); + + test('arr-core subscripts data fields before a method call + nested map (codex review ab192611)', async () => { + const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); + // Data field before a method must be subscripted; the method segment is + // kept (and itself lowered): x.name.toUpperCase() → x["name"].upper(). + expect(rewriteFastAPIExpr('items.map((x) => x.name.toUpperCase())', [])).toBe( + '[x["name"].upper() for x in items]', + ); + // Nested array method: the inner receiver x.tags must be subscripted so + // the inner comprehension iterates a dict field, not an attribute. + expect(rewriteFastAPIExpr('items.map((x) => x.tags.map((t) => t.name))', [])).toBe( + '[[t["name"] for t in x["tags"]] for x in items]', + ); + }); + test('=== / !== are skipped when inside string literals (Codex P2)', async () => { const { rewriteFastAPIExpr } = await import('../src/fastapi-response.js'); expect(rewriteFastAPIExpr('label = "use === for strict equality"', [])).toBe( From a893c3e93f7c26874ed591d610b3fcd040245de1 Mon Sep 17 00:00:00 2001 From: nicolascukas Date: Wed, 27 May 2026 13:31:39 +0200 Subject: [PATCH 6/6] =?UTF-8?q?ci(parity):=20gate=20Python=E2=86=94Express?= =?UTF-8?q?=20differential=20conformance=20in=20CI=20+=20pre-push?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The conformance harness (scripts/conformance.mjs) executes portable KERN expressions through BOTH the Python (FastAPI) and Express (JS) codegen paths and diffs the runtime results — the gate that caught the host-builtin and array/string-method portability bug cliffs. Until now it ran nowhere automatic; an unrun parity gate is theater, so wire it in: - package.json: `check:conformance` (build core/python/express + run the harness), mirroring `check:python-codegen`. - ci.yml: a "Differential conformance (Python↔Express parity)" step after the codegen quality gate. Unlike that step (ast-valid but semantically blind), this proves behavioral parity by executing the generated artifacts. - pre-push.mjs: gated `runConformance()` — fires only when @kernlang/core, /python, or /express are affected; `KERN_PRE_PUSH_SKIP_CONFORMANCE=1` escape hatch. Runs before the heavier scoped review so a regression fails fast. Verified green on clean dev (gate won't block the base); pre-push unit tests 11/0; trigger logic confirmed per-scenario (python/express/core fire, unrelated skip). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/ci.yml | 10 ++++++++++ package.json | 1 + scripts/pre-push.mjs | 20 ++++++++++++++++++++ 3 files changed, 31 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc4f9d04..dc0e5f8b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,6 +53,16 @@ jobs: - name: Python codegen quality gate run: pnpm check:python-codegen + # Differential parity gate: executes portable KERN expressions through BOTH + # the Python (FastAPI) and Express (JS) codegen paths and diffs the runtime + # results. Unlike the codegen quality gate above (ast-valid but semantically + # blind), this proves Python<->TS behavioral parity by actually running the + # generated artifacts -- it is what caught the host-builtin and array/string + # method portability bug cliffs. Needs python3 (set up above) + the built + # core/python/express dist (the script builds them). + - name: Differential conformance (Python<->Express parity) + run: pnpm check:conformance + - name: Test (excluding IR-semantics harness) run: pnpm test:non-semantics diff --git a/package.json b/package.json index 68136c1c..28939f95 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ "test:non-semantics": "pnpm -r --filter '!kern-monorepo' --filter '!@kernlang/review-python' test --testPathIgnorePatterns=ir-semantics && pnpm test:prepush && pnpm check:rule-coverage", "check:rule-coverage": "node ./scripts/check-rule-coverage.mjs", "check:python-codegen": "pnpm --filter @kernlang/core --filter @kernlang/python build && node ./scripts/lift-rate-python.mjs --check", + "check:conformance": "pnpm --filter @kernlang/core --filter @kernlang/python --filter @kernlang/express build && node ./scripts/conformance.mjs", "docs:contracts": "pnpm --filter @kernlang/core build && node ./scripts/generate-ir-semantics-docs.mjs --format=markdown --out=-", "docs:contracts:json": "pnpm --filter @kernlang/core build && node ./scripts/generate-ir-semantics-docs.mjs --format=json --out=generated/contracts/registry.json", "docs:contracts:check": "pnpm --filter @kernlang/core build && node ./scripts/check-contract-docs.mjs", diff --git a/scripts/pre-push.mjs b/scripts/pre-push.mjs index 6d9b7afb..5777f51f 100755 --- a/scripts/pre-push.mjs +++ b/scripts/pre-push.mjs @@ -22,6 +22,11 @@ const ROOT_WIDE_FILES = new Set([ 'tsconfig.json', ]); const ROOT_WIDE_PREFIXES = ['scripts/']; +// Packages whose codegen the differential conformance harness exercises. When +// any is in the affected set, run `check:conformance` so a Python↔Express +// parity regression is caught before the push (the harness executes both +// generated artifacts and diffs the results). +const CONFORMANCE_PACKAGES = new Set(['@kernlang/core', '@kernlang/python', '@kernlang/express']); function run(command, args, options = {}) { const proc = spawnSync(command, args, { @@ -248,6 +253,18 @@ function runScopedReview(packages) { } } +function runConformance(packages) { + if (process.env.KERN_PRE_PUSH_SKIP_CONFORMANCE === '1') { + console.log('[pre-push] differential conformance skipped by KERN_PRE_PUSH_SKIP_CONFORMANCE=1.'); + return; + } + const triggers = packages.filter((pkg) => CONFORMANCE_PACKAGES.has(pkg.name)); + if (triggers.length === 0) return; + + console.log(`[pre-push] differential conformance (Python↔Express) — triggered by ${triggers.map((pkg) => pkg.name).join(', ')}...`); + run('pnpm', ['check:conformance']); +} + export function main() { try { const changedFiles = prePushChangedFiles(); @@ -264,6 +281,9 @@ export function main() { console.log(`[pre-push] affected packages: ${affected.map((pkg) => pkg.name).join(', ')}`); runPackageScript(affected, 'build'); runPackageScript(affected, 'test'); + // Run the cross-target parity gate before the (heavier) scoped review so a + // portability regression fails fast. + runConformance(affected); runScopedReview(affected); console.log('[pre-push] scoped checks passed.');