From 8d34f79e15ada2880d799473b624291f476936c3 Mon Sep 17 00:00:00 2001
From: Declade <110547349+Declade@users.noreply.github.com>
Date: Sun, 17 May 2026 11:22:14 +0200
Subject: [PATCH 1/5] =?UTF-8?q?feat(slice-2):=20core=20modules=20=E2=80=94?=
 =?UTF-8?q?=20gateway=20client,=20redaction=20extractor,=20recall,=20mocks?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the in-process methodology library Slice 2 needs:

- src/gateway-client.ts — typed wrapper around POST /api/v1/proxy/messages
  with mode=proving_ground. 2-retry exponential backoff with jitter on 5xx +
  connection errors; no retry on 4xx; per-request timeout (30s default,
  LUCAIRN_REQUEST_TIMEOUT_MS env-configurable). Env reads are call-time only
  — module import is side-effect-free.
- src/redaction-extractor.ts — pure converter from gateway proving-ground
  matches/missed/extras into a flat ExtractedRedaction[] tagged with HIPAA
  Safe Harbor category + verdict (tp/fn/fp). Unmapped extras carry
  hipaa_category=null so the FP count is preserved while taxonomy drift is
  observable.
- src/hipaa-category-mapping.ts — explicit one-way map from Lucairn
  sanitizer internal taxonomy ([PERSON_N], [LOCATION_N], …) to the 18 HIPAA
  Safe Harbor categories (45 CFR § 164.514(b)(2)(i)). Placeholder-parsing
  mirrors gateway extractEntityTypes (proxy.go:1361-1395).
- src/recall.ts — two consumer paths: aggregateExtracted() consumes
  gateway-attested verdicts (the harness's live path; arm's-length property
  preserved because matching runs inside the gateway, not in code Lucairn
  authored alongside the publication); computeRecallFromSpans() implements
  the ≥50%-character-overlap span-matching the Slice 2 brief locks for any
  future raw-span inline surface. Both produce the same RecallSummary shape.
- src/mocks/gateway-fixtures.ts — deterministic mock builders for msw-backed
  unit tests + --mock smoke scripts. Configurable missRate + spuriousFpCount
  exercise recall paths against known oracles.
- src/index.ts — barrel exports for the public surface.
- package.json — adds msw ^2.7 devDependency. No new runtime deps.

Cite-back for gateway response shape: proxy.go:35-58 (request schema),
proxy.go:361-373 (mode + activity validation), proxy.go:1068-1080
(ground_truth_evaluation emission), ground_truth.go:5-138 (result shape).
---
 package.json                  |   1 +
 pnpm-lock.yaml                | 395 ++++++++++++++++++++++++++++++++++
 src/gateway-client.ts         | 391 +++++++++++++++++++++++++++++++++
 src/hipaa-category-mapping.ts | 146 +++++++++++++
 src/index.ts                  |  61 +++++-
 src/mocks/gateway-fixtures.ts | 198 +++++++++++++++++
 src/recall.ts                 | 359 ++++++++++++++++++++++++++++++
 src/redaction-extractor.ts    | 127 +++++++++++
 8 files changed, 1677 insertions(+), 1 deletion(-)
 create mode 100644 src/gateway-client.ts
 create mode 100644 src/hipaa-category-mapping.ts
 create mode 100644 src/mocks/gateway-fixtures.ts
 create mode 100644 src/recall.ts
 create mode 100644 src/redaction-extractor.ts

diff --git a/package.json b/package.json
index b4563ca..5279c6e 100644
--- a/package.json
+++ b/package.json
@@ -25,6 +25,7 @@
   "devDependencies": {
     "@faker-js/faker": "^9.0.0",
     "@types/node": "^20.11.0",
+    "msw": "^2.14.6",
     "tsx": "^4.22.0",
     "typescript": "^5.4.0",
     "vitest": "^1.6.0"
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 7b5d5fc..83d46fc 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -14,6 +14,9 @@ importers:
       '@types/node':
         specifier: ^20.11.0
         version: 20.19.41
+      msw:
+        specifier: ^2.14.6
+        version: 2.14.6(@types/node@20.19.41)(typescript@5.9.3)
       tsx:
         specifier: ^4.22.0
         version: 4.22.1
@@ -324,6 +327,41 @@ packages:
     resolution: {integrity: sha512-OEl393iCOoo/z8bMezRlJu+GlRGlsKbUAN7jKB6LhnKoqKve5DXRpalbItIIcwnCjs1k/FOPjFzcA6Qn+H+YbA==}
     engines: {node: '>=18.0.0', npm: '>=9.0.0'}
 
+  '@inquirer/ansi@2.0.5':
+    resolution: {integrity: sha512-doc2sWgJpbFQ64UflSVd17ibMGDuxO1yKgOgLMwavzESnXjFWJqUeG8saYosqKpHp4kWiM5x1nXvEjbpx90gzw==}
+    engines: {node: '>=23.5.0 || ^22.13.0 || ^21.7.0 || ^20.12.0'}
+
+  '@inquirer/confirm@6.0.13':
+    resolution: {integrity: sha512-wkGPC7yJ5WJk1DJ5SX7fzk+gfj4BM8cf5dDDi71B/551xHrdsZVRJOC0WyikXd0pEsb/9cLniuE4atbsMqmFkw==}
+    engines: {node: '>=23.5.0 || ^22.13.0 || ^21.7.0 || ^20.12.0'}
+    peerDependencies:
+      '@types/node': '>=18'
+    peerDependenciesMeta:
+      '@types/node':
+        optional: true
+
+  '@inquirer/core@11.1.10':
+    resolution: {integrity: sha512-a4Q5BXHQAHa9eO202sTaFCHFYVB3x5fauDuThEAdZ9gfn76pSxiKU7wWcEH0N1O0XmQvNfQNU6QXpiRxmYQx+A==}
+    engines: {node: '>=23.5.0 || ^22.13.0 || ^21.7.0 || ^20.12.0'}
+    peerDependencies:
+      '@types/node': '>=18'
+    peerDependenciesMeta:
+      '@types/node':
+        optional: true
+
+  '@inquirer/figures@2.0.5':
+    resolution: {integrity: sha512-NsSs4kzfm12lNetHwAn3GEuH317IzpwrMCbOuMIVytpjnJ90YYHNwdRgYGuKmVxwuIqSgqk3M5qqQt1cDk0tGQ==}
+    engines: {node: '>=23.5.0 || ^22.13.0 || ^21.7.0 || ^20.12.0'}
+
+  '@inquirer/type@4.0.5':
+    resolution: {integrity: sha512-aetVUNeKNc/VriqXlw1NRSW0zhMBB0W4bNbWRJgzRl/3d0QNDQFfk0GO5SDdtjMZVg6o8ZKEiadd7SCCzoOn5Q==}
+    engines: {node: '>=23.5.0 || ^22.13.0 || ^21.7.0 || ^20.12.0'}
+    peerDependencies:
+      '@types/node': '>=18'
+    peerDependenciesMeta:
+      '@types/node':
+        optional: true
+
   '@jest/schemas@29.6.3':
     resolution: {integrity: sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
@@ -331,6 +369,22 @@ packages:
   '@jridgewell/sourcemap-codec@1.5.5':
     resolution: {integrity: sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==}
 
+  '@mswjs/interceptors@0.41.9':
+    resolution: {integrity: sha512-VVPPgHyQ6ShqnrmDWuxjmUIsO9gWyOZFmuOfLd9LfBGQJwZfy0gvv9pbHSJuoFNIYC7ZDX9aoFwowjcdSC4E8w==}
+    engines: {node: '>=18'}
+
+  '@open-draft/deferred-promise@2.2.0':
+    resolution: {integrity: sha512-CecwLWx3rhxVQF6V4bAgPS5t+So2sTbPgAzafKkVizyi7tlwpcFpdFqq+wqF2OwNBmqFuu6tOyouTuxgpMfzmA==}
+
+  '@open-draft/deferred-promise@3.0.0':
+    resolution: {integrity: sha512-XW375UK8/9SqUVNVa6M0yEy8+iTi4QN5VZ7aZuRFQmy76LRwI9wy5F4YIBU6T+eTe2/DNDo8tqu8RHlwLHM6RA==}
+
+  '@open-draft/logger@0.3.0':
+    resolution: {integrity: sha512-X2g45fzhxH238HKO4xbSr7+wBS8Fvw6ixhTDuvLd5mqh6bJJCFAPwU9mPDxbcrRtfxv4u5IHCEH77BmxvXmmxQ==}
+
+  '@open-draft/until@2.1.0':
+    resolution: {integrity: sha512-U69T3ItWHvLwGg5eJ0n3I62nWuE6ilHlmz7zM0npLBRvPRd7e6NYmg54vvRtP5mZG7kZqZCFVdsTWo7BPtBujg==}
+
   '@rollup/rollup-android-arm-eabi@4.60.4':
     resolution: {integrity: sha512-F5QXMSiFebS9hKZj02XhWLLnRpJ3B3AROP0tWbFBSj+6kCbg5m9j5JoHKd4mmSVy5mS/IMQloYgYxCuJC0fxEQ==}
     cpu: [arm]
@@ -481,6 +535,12 @@ packages:
   '@types/node@20.19.41':
     resolution: {integrity: sha512-ECymXOukMnOoVkC2bb1Vc/w/836DXncOg5m8Xj1RH7xSHZJWNYY6Zh7EH477vcnD5egKNNfy2RpNOmuChhFPgQ==}
 
+  '@types/set-cookie-parser@2.4.10':
+    resolution: {integrity: sha512-GGmQVGpQWUe5qglJozEjZV/5dyxbOOZ0LHe/lqyWssB88Y4svNfst0uqBVscdDeIKl5Jy5+aPSvy7mI9tYRguw==}
+
+  '@types/statuses@2.0.6':
+    resolution: {integrity: sha512-xMAgYwceFhRA2zY+XbEA7mxYbA093wdiW8Vu6gZPGWy9cmOyU9XesH1tNcEWsKFd5Vzrqx5T3D38PWx1FIIXkA==}
+
   '@vitest/expect@1.6.1':
     resolution: {integrity: sha512-jXL+9+ZNIJKruofqXuuTClf44eSpcHlgj3CiuNihUF3Ioujtmc0zIa3UJOW5RjDK1YLBJZnWBlPuqhYycLioog==}
 
@@ -505,6 +565,14 @@ packages:
     engines: {node: '>=0.4.0'}
     hasBin: true
 
+  ansi-regex@5.0.1:
+    resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
+    engines: {node: '>=8'}
+
+  ansi-styles@4.3.0:
+    resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
+    engines: {node: '>=8'}
+
   ansi-styles@5.2.0:
     resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==}
     engines: {node: '>=10'}
@@ -523,9 +591,28 @@ packages:
   check-error@1.0.3:
     resolution: {integrity: sha512-iKEoDYaRmd1mxM90a2OEfWhjsjPpYPuQ+lMYsoxB126+t8fw7ySEO48nmDg5COTjxDI65/Y2OWpeEHk3ZOe8zg==}
 
+  cli-width@4.1.0:
+    resolution: {integrity: sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ==}
+    engines: {node: '>= 12'}
+
+  cliui@8.0.1:
+    resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==}
+    engines: {node: '>=12'}
+
+  color-convert@2.0.1:
+    resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==}
+    engines: {node: '>=7.0.0'}
+
+  color-name@1.1.4:
+    resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
+
   confbox@0.1.8:
     resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==}
 
+  cookie@1.1.1:
+    resolution: {integrity: sha512-ei8Aos7ja0weRpFzJnEA9UHJ/7XQmqglbRwnf2ATjcB9Wq874VKH9kfjjirM6UhU2/E5fFYadylyhFldcqSidQ==}
+    engines: {node: '>=18'}
+
   cross-spawn@7.0.6:
     resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
     engines: {node: '>= 8'}
@@ -547,6 +634,9 @@ packages:
     resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==}
     engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0}
 
+  emoji-regex@8.0.0:
+    resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
+
   esbuild@0.21.5:
     resolution: {integrity: sha512-mg3OPMV4hXywwpoDxu3Qda5xCKQi+vCTZq8S9J/EpkhB2HzKXq4SNFZE3+NK93JYxc8VMSep+lOUSC/RVKaBqw==}
     engines: {node: '>=12'}
@@ -557,6 +647,10 @@ packages:
     engines: {node: '>=18'}
     hasBin: true
 
+  escalade@3.2.0:
+    resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==}
+    engines: {node: '>=6'}
+
   estree-walker@3.0.3:
     resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==}
 
@@ -564,11 +658,24 @@ packages:
     resolution: {integrity: sha512-VyhnebXciFV2DESc+p6B+y0LjSm0krU4OgJN44qFAhBY0TJ+1V61tYD2+wHusZ6F9n5K+vl8k0sTy7PEfV4qpg==}
     engines: {node: '>=16.17'}
 
+  fast-string-truncated-width@3.0.3:
+    resolution: {integrity: sha512-0jjjIEL6+0jag3l2XWWizO64/aZVtpiGE3t0Zgqxv0DPuxiMjvB3M24fCyhZUO4KomJQPj3LTSUnDP3GpdwC0g==}
+
+  fast-string-width@3.0.2:
+    resolution: {integrity: sha512-gX8LrtNEI5hq8DVUfRQMbr5lpaS4nMIWV+7XEbXk2b8kiQIizgnlr12B4dA3ZEx3308ze0O4Q1R+cHts8kyUJg==}
+
+  fast-wrap-ansi@0.2.0:
+    resolution: {integrity: sha512-rLV8JHxTyhVmFYhBJuMujcrHqOT2cnO5Zxj37qROj23CP39GXubJRBUFF0z8KFK77Uc0SukZUf7JZhsVEQ6n8w==}
+
   fsevents@2.3.3:
     resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
     engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
     os: [darwin]
 
+  get-caller-file@2.0.5:
+    resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==}
+    engines: {node: 6.* || 8.* || >= 10.*}
+
   get-func-name@2.0.2:
     resolution: {integrity: sha512-8vXOvuE167CtIc3OyItco7N/dpRtBbYOsPsXCz7X/PMnlGjYjSGuZJgM1Y7mmew7BKf9BqvLX2tnOVy1BBUsxQ==}
 
@@ -576,10 +683,24 @@ packages:
     resolution: {integrity: sha512-VaUJspBffn/LMCJVoMvSAdmscJyS1auj5Zulnn5UoYcY531UWmdwhRWkcGKnGU93m5HSXP9LP2usOryrBtQowA==}
     engines: {node: '>=16'}
 
+  graphql@16.14.0:
+    resolution: {integrity: sha512-BBvQ/406p+4CZbTpCbVPSxfzrZrbnuWSP1ELYgyS6B+hNeKzgrdB4JczCa5VZUBQrDa9hUngm0KnexY6pJRN5Q==}
+    engines: {node: ^12.22.0 || ^14.16.0 || ^16.0.0 || >=17.0.0}
+
+  headers-polyfill@5.0.1:
+    resolution: {integrity: sha512-1TJ6Fih/b8h5TIcv+1+Hw0PDQWJTKDKzFZzcKOiW1wJza3XoAQlkCuXLbymPYB8+ZQyw8mHvdw560e8zVFIWyA==}
+
   human-signals@5.0.0:
     resolution: {integrity: sha512-AXcZb6vzzrFAUE61HnN4mpLqd/cSIwNQjtNWR0euPm6y0iqx3G4gOXaIDdtdDwZmhwe82LA6+zinmW4UBWVePQ==}
     engines: {node: '>=16.17.0'}
 
+  is-fullwidth-code-point@3.0.0:
+    resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==}
+    engines: {node: '>=8'}
+
+  is-node-process@1.2.0:
+    resolution: {integrity: sha512-Vg4o6/fqPxIjtxgUH5QLJhwZ7gW5diGCVlXpuUfELC62CuxM1iHcRe51f2W1FDy04Ai4KJkagKjx3XaqyfRKXw==}
+
   is-stream@3.0.0:
     resolution: {integrity: sha512-LnQR4bZ9IADDRSkvpqMGvt/tEJWclzklNgSw48V5EAaAeDd6qGvN8ei6k5p0tvxSR171VmGyHuTiAOfxAbr8kA==}
     engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
@@ -613,6 +734,20 @@ packages:
   ms@2.1.3:
     resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
 
+  msw@2.14.6:
+    resolution: {integrity: sha512-ALe+N10S72cyx94cMcy3Zs4HhXCj35sgeAL4c+WTvKi0zWnbd8/h0lcFqv0mb2P+aSgAdD7p9HzvA0DiUPxsyg==}
+    engines: {node: '>=18'}
+    hasBin: true
+    peerDependencies:
+      typescript: '>= 4.8.x'
+    peerDependenciesMeta:
+      typescript:
+        optional: true
+
+  mute-stream@3.0.0:
+    resolution: {integrity: sha512-dkEJPVvun4FryqBmZ5KhDo0K9iDXAwn08tMLDinNdRBNPcYEDiWYysLcc6k3mjTMlbP9KyylvRpd4wFtwrT9rw==}
+    engines: {node: ^20.17.0 || >=22.9.0}
+
   nanoid@3.3.12:
     resolution: {integrity: sha512-ZB9RH/39qpq5Vu6Y+NmUaFhQR6pp+M2Xt76XBnEwDaGcVAqhlvxrl3B2bKS5D3NH3QR76v3aSrKaF/Kiy7lEtQ==}
     engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1}
@@ -626,6 +761,9 @@ packages:
     resolution: {integrity: sha512-1FlR+gjXK7X+AsAHso35MnyN5KqGwJRi/31ft6x0M194ht7S+rWAvd7PHss9xSKMzE0asv1pyIHaJYq+BbacAQ==}
     engines: {node: '>=12'}
 
+  outvariant@1.4.3:
+    resolution: {integrity: sha512-+Sl2UErvtsoajRDKCE5/dBz4DIvHXQQnAxtQTF04OJxY0+DyZXSo5P5Bb7XYWOh81syohlYL24hbDwxedPUJCA==}
+
   p-limit@5.0.0:
     resolution: {integrity: sha512-/Eaoq+QyLSiXQ4lyYV23f14mZRQcXnxfHrN0vCai+ak9G0pp9iEQukIIZq5NccEvwRB8PUnZT0KsOoDCINS1qQ==}
     engines: {node: '>=18'}
@@ -638,6 +776,9 @@ packages:
     resolution: {integrity: sha512-haREypq7xkM7ErfgIyA0z+Bj4AGKlMSdlQE2jvJo6huWD1EdkKYV+G/T4nq0YEF2vgTT8kqMFKo1uHn950r4SQ==}
     engines: {node: '>=12'}
 
+  path-to-regexp@6.3.0:
+    resolution: {integrity: sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==}
+
   pathe@1.1.2:
     resolution: {integrity: sha512-whLdWMYL2TwI08hn8/ZqAbrVemu0LNaNNJZX73O6qaIdCTfXutsLhMkjdENX0qhsQ9uIimo4/aQOmXkoon2nDQ==}
 
@@ -664,11 +805,21 @@ packages:
   react-is@18.3.1:
     resolution: {integrity: sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==}
 
+  require-directory@2.1.1:
+    resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==}
+    engines: {node: '>=0.10.0'}
+
+  rettime@0.11.11:
+    resolution: {integrity: sha512-ILJRqVWBCTlg9r42fFgwVZx1gnFAcQF8mRoMkbgQfIrjEDf9nbBFDFx00oloOa+Q869FUtaYDXZvEfnecQSCoQ==}
+
   rollup@4.60.4:
     resolution: {integrity: sha512-WHeFSbZYsPu3+bLoNRUuAO+wavNlocOPf3wSHTP7hcFKVnJeWsYlCDbr3mTS14FCizf9ccIxXA8sGL8zKeQN3g==}
     engines: {node: '>=18.0.0', npm: '>=8.0.0'}
     hasBin: true
 
+  set-cookie-parser@3.1.0:
+    resolution: {integrity: sha512-kjnC1DXBHcxaOaOXBHBeRtltsDG2nUiUni+jP92M9gYdW12rsmx92UsfpH7o5tDRs7I1ZZPSQJQGv3UaRfCiuw==}
+
   shebang-command@2.0.0:
     resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==}
     engines: {node: '>=8'}
@@ -691,9 +842,24 @@ packages:
   stackback@0.0.2:
     resolution: {integrity: sha512-1XMJE5fQo1jGH6Y/7ebnwPOBEkIEnT4QF32d5R1+VXdXveM0IBMJt8zfaxX1P3QhVwrYe+576+jkANtSS2mBbw==}
 
+  statuses@2.0.2:
+    resolution: {integrity: sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==}
+    engines: {node: '>= 0.8'}
+
   std-env@3.10.0:
     resolution: {integrity: sha512-5GS12FdOZNliM5mAOxFRg7Ir0pWz8MdpYm6AY6VPkGpbA7ZzmbzNcBJQ0GPvvyWgcY7QAhCgf9Uy89I03faLkg==}
 
+  strict-event-emitter@0.5.1:
+    resolution: {integrity: sha512-vMgjE/GGEPEFnhFub6pa4FmJBRBVOLpIII2hvCZ8Kzb7K0hlHo7mQv6xYrBvCL2LtAIBwFUK8wvuJgTVSQ5MFQ==}
+
+  string-width@4.2.3:
+    resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
+    engines: {node: '>=8'}
+
+  strip-ansi@6.0.1:
+    resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
+    engines: {node: '>=8'}
+
   strip-final-newline@3.0.0:
     resolution: {integrity: sha512-dOESqjYr96iWYylGObzd39EuNTa5VJxyvVAEm5Jnh7KGo75V43Hk1odPQkNDyXNmUR6k+gEiDVXnjB8HJ3crXw==}
     engines: {node: '>=12'}
@@ -701,6 +867,10 @@ packages:
   strip-literal@2.1.1:
     resolution: {integrity: sha512-631UJ6O00eNGfMiWG78ck80dfBab8X6IVFB51jZK5Icd7XAs60Z5y7QdSd/wGIklnWvRbUNloVzhOKKmutxQ6Q==}
 
+  tagged-tag@1.0.0:
+    resolution: {integrity: sha512-yEFYrVhod+hdNyx7g5Bnkkb0G6si8HJurOoOEgC8B/O0uXLHlaey/65KRv6cuWBNhBgHKAROVpc7QyYqE5gFng==}
+    engines: {node: '>=20'}
+
   tinybench@2.9.0:
     resolution: {integrity: sha512-0+DUvqWMValLmha6lr4kD8iAMK1HzV0/aKnCtWb9v9641TnP/MFb7Pc2bxoxQjTXAErryXVgUOfv2YqNllqGeg==}
 
@@ -712,6 +882,17 @@ packages:
     resolution: {integrity: sha512-KYad6Vy5VDWV4GH3fjpseMQ/XU2BhIYP7Vzd0LG44qRWm/Yt2WCOTicFdvmgo6gWaqooMQCawTtILVQJupKu7A==}
     engines: {node: '>=14.0.0'}
 
+  tldts-core@7.0.30:
+    resolution: {integrity: sha512-uiHN8PIB1VmWyS98eZYja4xzlYqeFZVjb4OuYlJQnZAuJhMw4PbKQOKgHKhBdJR3FE/t5mUQ1Kd80++B+qhD1Q==}
+
+  tldts@7.0.30:
+    resolution: {integrity: sha512-ELrFxuqsDdHUwoh0XxDbxuLD3Wnz49Z57IFvTtvWy1hJdcMZjXLIuonjilCiWHlT2GbE4Wlv1wKVTzDFnXH1aw==}
+    hasBin: true
+
+  tough-cookie@6.0.1:
+    resolution: {integrity: sha512-LktZQb3IeoUWB9lqR5EWTHgW/VTITCXg4D21M+lvybRVdylLrRMnqaIONLVb5mav8vM19m44HIcGq4qASeu2Qw==}
+    engines: {node: '>=16'}
+
   tsx@4.22.1:
     resolution: {integrity: sha512-TvncJykhxAzFCk0VQZKBTClall4Pm7qXDSodb6uxi8QFa8X8mT6ABjxxsQ2opDRYxG7AzcRWXaFtruz5HJKuWg==}
     engines: {node: '>=18.0.0'}
@@ -721,6 +902,10 @@ packages:
     resolution: {integrity: sha512-Acylog8/luQ8L7il+geoSxhEkazvkslg7PSNKOX59mbB9cOveP5aq9h74Y7YU8yDpJwetzQQrfIwtf4Wp4LKcw==}
     engines: {node: '>=4'}
 
+  type-fest@5.6.0:
+    resolution: {integrity: sha512-8ZiHFm91orbSAe2PSAiSVBVko18pbhbiB3U9GglSzF/zCGkR+rxpHx6sEMCUm4kxY4LjDIUGgCfUMtwfZfjfUA==}
+    engines: {node: '>=20'}
+
   typescript@5.9.3:
     resolution: {integrity: sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==}
     engines: {node: '>=14.17'}
@@ -732,6 +917,9 @@ packages:
   undici-types@6.21.0:
     resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
 
+  until-async@3.0.2:
+    resolution: {integrity: sha512-IiSk4HlzAMqTUseHHe3VhIGyuFmN90zMTpD3Z3y8jeQbzLIq500MVM7Jq2vUAnTKAFPJrqwkzr6PoTcPhGcOiw==}
+
   vite-node@1.6.1:
     resolution: {integrity: sha512-YAXkfvGtuTzwWbDSACdJSg4A4DZiAqckWe90Zapc/sEX3XvHcw1NdurM/6od8J207tSDqNbSsgdCacBgvJKFuA==}
     engines: {node: ^18.0.0 || >=20.0.0}
@@ -803,6 +991,22 @@ packages:
     engines: {node: '>=8'}
     hasBin: true
 
+  wrap-ansi@7.0.0:
+    resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
+    engines: {node: '>=10'}
+
+  y18n@5.0.8:
+    resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==}
+    engines: {node: '>=10'}
+
+  yargs-parser@21.1.1:
+    resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==}
+    engines: {node: '>=12'}
+
+  yargs@17.7.2:
+    resolution: {integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==}
+    engines: {node: '>=12'}
+
   yocto-queue@1.2.2:
     resolution: {integrity: sha512-4LCcse/U2MHZ63HAJVE+v71o7yOdIe4cZ70Wpf8D/IyjDKYQLV5GD46B+hSTjJsvV5PztjvHoU580EftxjDZFQ==}
     engines: {node: '>=12.20'}
@@ -958,12 +1162,59 @@ snapshots:
 
   '@faker-js/faker@9.9.0': {}
 
+  '@inquirer/ansi@2.0.5': {}
+
+  '@inquirer/confirm@6.0.13(@types/node@20.19.41)':
+    dependencies:
+      '@inquirer/core': 11.1.10(@types/node@20.19.41)
+      '@inquirer/type': 4.0.5(@types/node@20.19.41)
+    optionalDependencies:
+      '@types/node': 20.19.41
+
+  '@inquirer/core@11.1.10(@types/node@20.19.41)':
+    dependencies:
+      '@inquirer/ansi': 2.0.5
+      '@inquirer/figures': 2.0.5
+      '@inquirer/type': 4.0.5(@types/node@20.19.41)
+      cli-width: 4.1.0
+      fast-wrap-ansi: 0.2.0
+      mute-stream: 3.0.0
+      signal-exit: 4.1.0
+    optionalDependencies:
+      '@types/node': 20.19.41
+
+  '@inquirer/figures@2.0.5': {}
+
+  '@inquirer/type@4.0.5(@types/node@20.19.41)':
+    optionalDependencies:
+      '@types/node': 20.19.41
+
   '@jest/schemas@29.6.3':
     dependencies:
       '@sinclair/typebox': 0.27.10
 
   '@jridgewell/sourcemap-codec@1.5.5': {}
 
+  '@mswjs/interceptors@0.41.9':
+    dependencies:
+      '@open-draft/deferred-promise': 2.2.0
+      '@open-draft/logger': 0.3.0
+      '@open-draft/until': 2.1.0
+      is-node-process: 1.2.0
+      outvariant: 1.4.3
+      strict-event-emitter: 0.5.1
+
+  '@open-draft/deferred-promise@2.2.0': {}
+
+  '@open-draft/deferred-promise@3.0.0': {}
+
+  '@open-draft/logger@0.3.0':
+    dependencies:
+      is-node-process: 1.2.0
+      outvariant: 1.4.3
+
+  '@open-draft/until@2.1.0': {}
+
   '@rollup/rollup-android-arm-eabi@4.60.4':
     optional: true
 
@@ -1049,6 +1300,12 @@ snapshots:
     dependencies:
       undici-types: 6.21.0
 
+  '@types/set-cookie-parser@2.4.10':
+    dependencies:
+      '@types/node': 20.19.41
+
+  '@types/statuses@2.0.6': {}
+
   '@vitest/expect@1.6.1':
     dependencies:
       '@vitest/spy': 1.6.1
@@ -1084,6 +1341,12 @@ snapshots:
 
   acorn@8.16.0: {}
 
+  ansi-regex@5.0.1: {}
+
+  ansi-styles@4.3.0:
+    dependencies:
+      color-convert: 2.0.1
+
   ansi-styles@5.2.0: {}
 
   assertion-error@1.1.0: {}
@@ -1104,8 +1367,24 @@ snapshots:
     dependencies:
       get-func-name: 2.0.2
 
+  cli-width@4.1.0: {}
+
+  cliui@8.0.1:
+    dependencies:
+      string-width: 4.2.3
+      strip-ansi: 6.0.1
+      wrap-ansi: 7.0.0
+
+  color-convert@2.0.1:
+    dependencies:
+      color-name: 1.1.4
+
+  color-name@1.1.4: {}
+
   confbox@0.1.8: {}
 
+  cookie@1.1.1: {}
+
   cross-spawn@7.0.6:
     dependencies:
       path-key: 3.1.1
@@ -1122,6 +1401,8 @@ snapshots:
 
   diff-sequences@29.6.3: {}
 
+  emoji-regex@8.0.0: {}
+
   esbuild@0.21.5:
     optionalDependencies:
       '@esbuild/aix-ppc64': 0.21.5
@@ -1177,6 +1458,8 @@ snapshots:
       '@esbuild/win32-ia32': 0.28.0
       '@esbuild/win32-x64': 0.28.0
 
+  escalade@3.2.0: {}
+
   estree-walker@3.0.3:
     dependencies:
       '@types/estree': 1.0.9
@@ -1193,15 +1476,38 @@ snapshots:
       signal-exit: 4.1.0
       strip-final-newline: 3.0.0
 
+  fast-string-truncated-width@3.0.3: {}
+
+  fast-string-width@3.0.2:
+    dependencies:
+      fast-string-truncated-width: 3.0.3
+
+  fast-wrap-ansi@0.2.0:
+    dependencies:
+      fast-string-width: 3.0.2
+
   fsevents@2.3.3:
     optional: true
 
+  get-caller-file@2.0.5: {}
+
   get-func-name@2.0.2: {}
 
   get-stream@8.0.1: {}
 
+  graphql@16.14.0: {}
+
+  headers-polyfill@5.0.1:
+    dependencies:
+      '@types/set-cookie-parser': 2.4.10
+      set-cookie-parser: 3.1.0
+
   human-signals@5.0.0: {}
 
+  is-fullwidth-code-point@3.0.0: {}
+
+  is-node-process@1.2.0: {}
+
   is-stream@3.0.0: {}
 
   isexe@2.0.0: {}
@@ -1234,6 +1540,33 @@ snapshots:
 
   ms@2.1.3: {}
 
+  msw@2.14.6(@types/node@20.19.41)(typescript@5.9.3):
+    dependencies:
+      '@inquirer/confirm': 6.0.13(@types/node@20.19.41)
+      '@mswjs/interceptors': 0.41.9
+      '@open-draft/deferred-promise': 3.0.0
+      '@types/statuses': 2.0.6
+      cookie: 1.1.1
+      graphql: 16.14.0
+      headers-polyfill: 5.0.1
+      is-node-process: 1.2.0
+      outvariant: 1.4.3
+      path-to-regexp: 6.3.0
+      picocolors: 1.1.1
+      rettime: 0.11.11
+      statuses: 2.0.2
+      strict-event-emitter: 0.5.1
+      tough-cookie: 6.0.1
+      type-fest: 5.6.0
+      until-async: 3.0.2
+      yargs: 17.7.2
+    optionalDependencies:
+      typescript: 5.9.3
+    transitivePeerDependencies:
+      - '@types/node'
+
+  mute-stream@3.0.0: {}
+
   nanoid@3.3.12: {}
 
   npm-run-path@5.3.0:
@@ -1244,6 +1577,8 @@ snapshots:
     dependencies:
       mimic-fn: 4.0.0
 
+  outvariant@1.4.3: {}
+
   p-limit@5.0.0:
     dependencies:
       yocto-queue: 1.2.2
@@ -1252,6 +1587,8 @@ snapshots:
 
   path-key@4.0.0: {}
 
+  path-to-regexp@6.3.0: {}
+
   pathe@1.1.2: {}
 
   pathe@2.0.3: {}
@@ -1280,6 +1617,10 @@ snapshots:
 
   react-is@18.3.1: {}
 
+  require-directory@2.1.1: {}
+
+  rettime@0.11.11: {}
+
   rollup@4.60.4:
     dependencies:
       '@types/estree': 1.0.8
@@ -1311,6 +1652,8 @@ snapshots:
       '@rollup/rollup-win32-x64-msvc': 4.60.4
       fsevents: 2.3.3
 
+  set-cookie-parser@3.1.0: {}
+
   shebang-command@2.0.0:
     dependencies:
       shebang-regex: 3.0.0
@@ -1325,20 +1668,46 @@ snapshots:
 
   stackback@0.0.2: {}
 
+  statuses@2.0.2: {}
+
   std-env@3.10.0: {}
 
+  strict-event-emitter@0.5.1: {}
+
+  string-width@4.2.3:
+    dependencies:
+      emoji-regex: 8.0.0
+      is-fullwidth-code-point: 3.0.0
+      strip-ansi: 6.0.1
+
+  strip-ansi@6.0.1:
+    dependencies:
+      ansi-regex: 5.0.1
+
   strip-final-newline@3.0.0: {}
 
   strip-literal@2.1.1:
     dependencies:
       js-tokens: 9.0.1
 
+  tagged-tag@1.0.0: {}
+
   tinybench@2.9.0: {}
 
   tinypool@0.8.4: {}
 
   tinyspy@2.2.1: {}
 
+  tldts-core@7.0.30: {}
+
+  tldts@7.0.30:
+    dependencies:
+      tldts-core: 7.0.30
+
+  tough-cookie@6.0.1:
+    dependencies:
+      tldts: 7.0.30
+
   tsx@4.22.1:
     dependencies:
       esbuild: 0.28.0
@@ -1347,12 +1716,18 @@ snapshots:
 
   type-detect@4.1.0: {}
 
+  type-fest@5.6.0:
+    dependencies:
+      tagged-tag: 1.0.0
+
   typescript@5.9.3: {}
 
   ufo@1.6.4: {}
 
   undici-types@6.21.0: {}
 
+  until-async@3.0.2: {}
+
   vite-node@1.6.1(@types/node@20.19.41):
     dependencies:
       cac: 6.7.14
@@ -1423,4 +1798,24 @@ snapshots:
       siginfo: 2.0.0
       stackback: 0.0.2
 
+  wrap-ansi@7.0.0:
+    dependencies:
+      ansi-styles: 4.3.0
+      string-width: 4.2.3
+      strip-ansi: 6.0.1
+
+  y18n@5.0.8: {}
+
+  yargs-parser@21.1.1: {}
+
+  yargs@17.7.2:
+    dependencies:
+      cliui: 8.0.1
+      escalade: 3.2.0
+      get-caller-file: 2.0.5
+      require-directory: 2.1.1
+      string-width: 4.2.3
+      y18n: 5.0.8
+      yargs-parser: 21.1.1
+
   yocto-queue@1.2.2: {}
diff --git a/src/gateway-client.ts b/src/gateway-client.ts
new file mode 100644
index 0000000..5dc8c11
--- /dev/null
+++ b/src/gateway-client.ts
@@ -0,0 +1,391 @@
+/**
+ * gateway-client.ts
+ *
+ * Typed wrapper around the Lucairn gateway's proving-ground proxy endpoint
+ * (`POST /api/v1/proxy/messages` with `mode: "proving_ground"`).
+ *
+ * Why this endpoint:
+ *   The proving-ground mode is the ONLY inline gateway surface that returns
+ *   per-entity matching evidence (matches / missed / extras keyed by the
+ *   caller-supplied annotation type) in the same HTTP response. The
+ *   alternative inline surfaces — `/v1/messages` and the public-summary
+ *   endpoint — emit only aggregate redaction counts and explicitly omit
+ *   per-entity fields for privacy reasons. References:
+ *     - dual-sandbox-architecture/services/gateway/internal/api/proxy.go:35-58
+ *       (proxyPIIAnnotation + proxyRequest schemas)
+ *     - dual-sandbox-architecture/services/gateway/internal/api/proxy.go:361-373
+ *       (mode validation, ActivityID + GroundTruth requirements)
+ *     - dual-sandbox-architecture/services/gateway/internal/api/proxy.go:1068-1080
+ *       (ground_truth_evaluation field emission)
+ *     - dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:5-138
+ *       (groundTruthResult + per-item shapes)
+ *
+ * The retry policy is 2 retries with exponential backoff (base 500 ms, jitter
+ * 0–200 ms) on 5xx and connection errors only. 4xx errors are surfaced
+ * without retry. The per-request timeout defaults to 30 s and is configurable
+ * via LUCAIRN_REQUEST_TIMEOUT_MS.
+ *
+ * No real secret material is referenced at import time — env reads happen at
+ * call time inside makeGatewayClient(). Tests run with msw active and use
+ * synthetic URLs / keys.
+ */
+
+import type { HipaaCategory, InjectedEntity } from './inject-pii-core.js';
+
+/**
+ * The annotation we send to the gateway as ground truth. `type` carries the
+ * HIPAA Safe Harbor category verbatim, so the gateway echoes it back in
+ * `matches[].annotation_type` and `missed[].type` and we can aggregate
+ * directly without a second mapping pass.
+ */
+export interface ProvingGroundAnnotation {
+  readonly type: HipaaCategory;
+  readonly value: string;
+  readonly start: number;
+  readonly end: number;
+}
+
+/**
+ * The per-row request body emitted to `POST /api/v1/proxy/messages`. The
+ * prompt template + context fields are minimal because Paper 1's measurement
+ * is upstream of inference — we are measuring sanitizer recall against
+ * known-injected PHI, not LLM behaviour. The single context field
+ * `transcription` carries the row text; the prompt template trivially echoes
+ * it back so the inference call completes.
+ */
+export interface GatewayRequestBody {
+  readonly prompt_template: string;
+  readonly context: Readonly<Record<string, string>>;
+  readonly mode: 'proving_ground';
+  readonly activity_id: string;
+  readonly ground_truth: Readonly<Record<string, readonly ProvingGroundAnnotation[]>>;
+  readonly relink_response: false;
+  readonly model?: string;
+  readonly max_tokens?: number;
+}
+
+/**
+ * Mirrors `groundTruthMatch` in
+ *   dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:20-24
+ */
+export interface GroundTruthMatch {
+  readonly annotation_type: string;
+  readonly annotation_value: string;
+  readonly redacted_as: string;
+}
+
+/**
+ * Mirrors `groundTruthMiss` in
+ *   dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:26-30
+ */
+export interface GroundTruthMiss {
+  readonly field: string;
+  readonly type: string;
+  readonly value: string;
+}
+
+/**
+ * Mirrors `groundTruthExtra` in
+ *   dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:32-35
+ */
+export interface GroundTruthExtra {
+  readonly placeholder: string;
+  readonly original: string;
+}
+
+/**
+ * Mirrors `groundTruthResult` in
+ *   dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:7-18
+ */
+export interface GroundTruthEvaluation {
+  readonly total_annotations: number;
+  readonly true_positives: number;
+  readonly false_negatives: number;
+  readonly false_positives: number;
+  readonly detection_rate: number;
+  readonly matches?: readonly GroundTruthMatch[];
+  readonly missed?: readonly GroundTruthMiss[];
+  readonly extras?: readonly GroundTruthExtra[];
+}
+
+/**
+ * Subset of the gateway proxy response that the harness reads. The full
+ * response also includes `result`, `model_used`, `dlp_redacted`,
+ * `compliance_trace`, etc. — those are surfaced verbatim in the raw NDJSON
+ * for downstream auditability but the harness reads only what's needed for
+ * cert collection + recall computation.
+ *
+ * `veil` is the Pro/Enterprise hint emitted at
+ *   dual-sandbox-architecture/services/gateway/internal/api/proxy.go:1088-1094
+ */
+export interface VeilHint {
+  readonly status: string;
+  readonly certificate_url: string;
+  readonly summary_url: string;
+}
+
+export interface GatewayResponse {
+  readonly request_id: string;
+  readonly status?: string;
+  readonly latency_ms?: number;
+  readonly redaction_count?: number;
+  readonly ground_truth_evaluation?: GroundTruthEvaluation;
+  readonly veil?: VeilHint;
+  // Free-form passthrough for the raw NDJSON dump — typed loosely so the
+  // harness never silently drops fields that the gateway adds later.
+  readonly [extra: string]: unknown;
+}
+
+export interface GatewayRowInput {
+  readonly row_index: number;
+  readonly transcription: string;
+  readonly entities: readonly InjectedEntity[];
+}
+
+export interface GatewayRowResult {
+  readonly row_index: number;
+  readonly request_id: string;
+  readonly cert_url: string | null;
+  readonly summary_url: string | null;
+  readonly evaluation: GroundTruthEvaluation | null;
+  readonly redaction_count: number | null;
+  readonly latency_ms: number | null;
+  readonly raw_response: GatewayResponse;
+}
+
+export interface GatewayClientOptions {
+  readonly gatewayUrl: string;
+  readonly apiKey: string;
+  readonly activityIdPrefix?: string;
+  readonly requestTimeoutMs?: number;
+  readonly maxRetries?: number;
+  readonly backoffBaseMs?: number;
+  readonly backoffJitterMs?: number;
+  readonly fetchFn?: typeof fetch;
+  readonly sleepFn?: (ms: number) => Promise<void>;
+  readonly randomFn?: () => number;
+  readonly model?: string;
+  readonly maxTokens?: number;
+}
+
+export interface GatewayClient {
+  runRow(row: GatewayRowInput): Promise<GatewayRowResult>;
+}
+
+export class GatewayClientError extends Error {
+  public readonly status: number | null;
+  public readonly responseBody: string | null;
+
+  constructor(message: string, status: number | null, responseBody: string | null) {
+    super(message);
+    this.name = 'GatewayClientError';
+    this.status = status;
+    this.responseBody = responseBody;
+  }
+}
+
+const DEFAULT_TIMEOUT_MS = 30_000;
+const DEFAULT_MAX_RETRIES = 2;
+const DEFAULT_BACKOFF_BASE_MS = 500;
+const DEFAULT_BACKOFF_JITTER_MS = 200;
+const DEFAULT_MODEL = 'claude-sonnet-4-6';
+const DEFAULT_MAX_TOKENS = 64;
+
+function defaultSleep(ms: number): Promise<void> {
+  return new Promise((resolve) => {
+    setTimeout(resolve, ms);
+  });
+}
+
+/**
+ * Construct an annotation list suitable for the proving-ground ground_truth
+ * field. The keying field name is fixed at `transcription` because that is
+ * the single context field we route through the sanitizer.
+ */
+function buildGroundTruth(
+  entities: readonly InjectedEntity[],
+): Record<string, ProvingGroundAnnotation[]> {
+  return {
+    transcription: entities.map((e) => ({
+      type: e.category,
+      value: e.value,
+      start: e.start_char,
+      end: e.end_char,
+    })),
+  };
+}
+
+/**
+ * Pure helper: extract the cert URL + summary URL from a gateway response.
+ * Exposed for the collect-certs script + unit testing without mounting a
+ * full client.
+ */
+export function extractCertUrls(response: GatewayResponse): {
+  cert_url: string | null;
+  summary_url: string | null;
+} {
+  const veil = response.veil;
+  if (!veil) return { cert_url: null, summary_url: null };
+  return {
+    cert_url: veil.certificate_url ?? null,
+    summary_url: veil.summary_url ?? null,
+  };
+}
+
+export function makeGatewayClient(options: GatewayClientOptions): GatewayClient {
+  if (options.gatewayUrl === '') {
+    throw new GatewayClientError(
+      'gatewayUrl is required (or set LUCAIRN_GATEWAY_URL)',
+      null,
+      null,
+    );
+  }
+  if (options.apiKey === '') {
+    throw new GatewayClientError('apiKey is required (or set LUCAIRN_API_KEY)', null, null);
+  }
+  const fetchFn = options.fetchFn ?? fetch;
+  const sleepFn = options.sleepFn ?? defaultSleep;
+  const randomFn = options.randomFn ?? Math.random;
+  const timeoutMs = options.requestTimeoutMs ?? DEFAULT_TIMEOUT_MS;
+  const maxRetries = options.maxRetries ?? DEFAULT_MAX_RETRIES;
+  const backoffBase = options.backoffBaseMs ?? DEFAULT_BACKOFF_BASE_MS;
+  const backoffJitter = options.backoffJitterMs ?? DEFAULT_BACKOFF_JITTER_MS;
+  const activityPrefix = options.activityIdPrefix ?? 'paper-1-healthcare';
+  const model = options.model ?? DEFAULT_MODEL;
+  const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
+  const endpoint = `${options.gatewayUrl.replace(/\/+$/u, '')}/api/v1/proxy/messages`;
+
+  async function runRow(row: GatewayRowInput): Promise<GatewayRowResult> {
+    const body: GatewayRequestBody = {
+      prompt_template:
+        'Echo the transcription back verbatim. Make no inferences. Transcription: {transcription}',
+      context: { transcription: row.transcription },
+      mode: 'proving_ground',
+      activity_id: `${activityPrefix}-row-${row.row_index}`,
+      ground_truth: buildGroundTruth(row.entities),
+      relink_response: false,
+      model,
+      max_tokens: maxTokens,
+    };
+
+    let attempt = 0;
+    // The retry budget is maxRetries + 1 (the initial attempt + retries).
+    while (true) {
+      attempt += 1;
+      let controller: AbortController | null = null;
+      let timeoutHandle: ReturnType<typeof setTimeout> | null = null;
+      try {
+        controller = new AbortController();
+        timeoutHandle = setTimeout(() => {
+          controller?.abort();
+        }, timeoutMs);
+        const response = await fetchFn(endpoint, {
+          method: 'POST',
+          headers: {
+            'content-type': 'application/json',
+            'x-api-key': options.apiKey,
+          },
+          body: JSON.stringify(body),
+          signal: controller.signal,
+        });
+        clearTimeout(timeoutHandle);
+        timeoutHandle = null;
+        if (response.status >= 500) {
+          // Retry-eligible.
+          const text = await safeReadText(response);
+          if (attempt > maxRetries) {
+            throw new GatewayClientError(
+              `gateway 5xx after ${attempt - 1} retries (status ${response.status})`,
+              response.status,
+              text,
+            );
+          }
+          await sleepFn(computeBackoffMs(attempt, backoffBase, backoffJitter, randomFn));
+          continue;
+        }
+        if (response.status >= 400) {
+          // 4xx is terminal — surface immediately, no retry.
+          const text = await safeReadText(response);
+          throw new GatewayClientError(
+            `gateway 4xx (status ${response.status})`,
+            response.status,
+            text,
+          );
+        }
+        const raw = (await response.json()) as GatewayResponse;
+        const urls = extractCertUrls(raw);
+        return {
+          row_index: row.row_index,
+          request_id: raw.request_id ?? '',
+          cert_url: urls.cert_url,
+          summary_url: urls.summary_url,
+          evaluation: raw.ground_truth_evaluation ?? null,
+          redaction_count: typeof raw.redaction_count === 'number' ? raw.redaction_count : null,
+          latency_ms: typeof raw.latency_ms === 'number' ? raw.latency_ms : null,
+          raw_response: raw,
+        };
+      } catch (err) {
+        if (timeoutHandle !== null) {
+          clearTimeout(timeoutHandle);
+        }
+        if (err instanceof GatewayClientError) {
+          // Terminal — already classified.
+          throw err;
+        }
+        // Connection / abort / unknown error → retry budget applies.
+        if (attempt > maxRetries) {
+          const reason = err instanceof Error ? err.message : String(err);
+          throw new GatewayClientError(
+            `gateway connection error after ${attempt - 1} retries: ${reason}`,
+            null,
+            null,
+          );
+        }
+        await sleepFn(computeBackoffMs(attempt, backoffBase, backoffJitter, randomFn));
+      }
+    }
+  }
+
+  return { runRow };
+}
+
+function computeBackoffMs(
+  attempt: number,
+  baseMs: number,
+  jitterMs: number,
+  randomFn: () => number,
+): number {
+  const expo = baseMs * 2 ** (attempt - 1);
+  const jitter = randomFn() * jitterMs;
+  return Math.floor(expo + jitter);
+}
+
+async function safeReadText(response: Response): Promise<string | null> {
+  try {
+    return await response.text();
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Read gateway URL + API key from process.env. Returns null fields if unset
+ * so callers can decide whether to enter mock mode or fail.
+ */
+export function readGatewayEnv(env: NodeJS.ProcessEnv = process.env): {
+  gatewayUrl: string | null;
+  apiKey: string | null;
+  requestTimeoutMs: number | null;
+} {
+  const url = env.LUCAIRN_GATEWAY_URL ?? null;
+  const key = env.LUCAIRN_API_KEY ?? null;
+  const timeoutStr = env.LUCAIRN_REQUEST_TIMEOUT_MS ?? null;
+  let timeoutMs: number | null = null;
+  if (timeoutStr !== null) {
+    const parsed = Number.parseInt(timeoutStr, 10);
+    if (Number.isFinite(parsed) && parsed > 0) {
+      timeoutMs = parsed;
+    }
+  }
+  return { gatewayUrl: url, apiKey: key, requestTimeoutMs: timeoutMs };
+}
diff --git a/src/hipaa-category-mapping.ts b/src/hipaa-category-mapping.ts
new file mode 100644
index 0000000..be47108
--- /dev/null
+++ b/src/hipaa-category-mapping.ts
@@ -0,0 +1,146 @@
+/**
+ * hipaa-category-mapping.ts
+ *
+ * Maps Lucairn's internal sanitizer placeholder types (the `[TYPE_N]` shape)
+ * back to the 18 HIPAA Safe Harbor categories defined in
+ * `src/inject-pii-core.ts:28-47` (45 CFR § 164.514(b)(2)(i)).
+ *
+ * Why this exists:
+ *   The Lucairn sanitizer emits redactions whose `placeholder` field is of the
+ *   form `[TYPE_N]` where TYPE is an internal taxonomy term (PERSON, LOCATION,
+ *   PHONE_NUMBER, etc.). The HIPAA Safe Harbor enumeration is the standard the
+ *   research program reports recall against. This module is the documented
+ *   bridge between the two taxonomies.
+ *
+ * Cite-back: gateway emits `placeholder` per redaction at
+ *   `dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:48-56`
+ * and the placeholder parsing convention at
+ *   `dual-sandbox-architecture/services/gateway/internal/api/proxy.go:1361-1395`
+ * (extractEntityTypes — accepts `[TYPE_N]` where TYPE is one or more
+ * uppercase letters/underscores).
+ *
+ * The mapping is intentionally explicit and one-way (internal → HIPAA). If
+ * Lucairn introduces a new sanitizer type, this table MUST be extended before
+ * Paper 1 numbers are re-published — an unmapped placeholder is a recall
+ * accounting gap, not a silent passthrough.
+ */
+
+import type { HipaaCategory } from './inject-pii-core.js';
+
+/**
+ * The exhaustive mapping from Lucairn internal sanitizer types to HIPAA Safe
+ * Harbor categories.
+ *
+ * Sources for the right-hand-side category assignments:
+ *  - 45 CFR § 164.514(b)(2)(i) Safe Harbor enumeration (the 18 categories
+ *    listed in `src/inject-pii-core.ts:28-47`).
+ *  - Lucairn sanitizer's internal type vocabulary as observed in the gateway
+ *    `extractEntityTypes` logic (`proxy.go:1361-1395`) and the Presidio +
+ *    custom recognizer catalogue.
+ *
+ * Categories not currently emitted by the sanitizer (e.g. FACE_PHOTO_REF,
+ * BIOMETRIC_ID) are absent from this map; they appear in injected ground
+ * truth only and will show as false-negatives if the sanitizer never detects
+ * them, which is correct accounting.
+ */
+export const LUCAIRN_TO_HIPAA: Readonly<Record<string, HipaaCategory>> = Object.freeze({
+  // Name-bearing types
+  PERSON: 'NAME',
+  PERSON_NAME: 'NAME',
+  NAME: 'NAME',
+
+  // Geographic subdivisions
+  LOCATION: 'GEO_SUBDIVISION',
+  ADDRESS: 'GEO_SUBDIVISION',
+  STREET_ADDRESS: 'GEO_SUBDIVISION',
+  ZIP_CODE: 'GEO_SUBDIVISION',
+  GERMAN_ZIP_CODE: 'GEO_SUBDIVISION',
+  CITY: 'GEO_SUBDIVISION',
+
+  // Dates
+  DATE: 'DATE',
+  DATE_TIME: 'DATE',
+
+  // Telephone / fax — sanitizer does not natively distinguish PHONE from FAX.
+  // We map both PHONE_NUMBER and PHONE to PHONE; FAX is only recognised when
+  // a custom recognizer surfaces FAX explicitly.
+  PHONE_NUMBER: 'PHONE',
+  PHONE: 'PHONE',
+  FAX: 'FAX',
+  FAX_NUMBER: 'FAX',
+
+  // Email
+  EMAIL: 'EMAIL',
+  EMAIL_ADDRESS: 'EMAIL',
+
+  // US identifier-shaped categories
+  US_SSN: 'SSN',
+  SSN: 'SSN',
+
+  // Medical record / health-plan / account / license / vehicle / device
+  MRN: 'MRN',
+  MEDICAL_RECORD_NUMBER: 'MRN',
+  HEALTH_PLAN_ID: 'HEALTH_PLAN_ID',
+  HEALTH_PLAN_BENEFICIARY_NUMBER: 'HEALTH_PLAN_ID',
+  ACCOUNT_NUMBER: 'ACCOUNT_NUMBER',
+  US_BANK_NUMBER: 'ACCOUNT_NUMBER',
+  IBAN: 'ACCOUNT_NUMBER',
+  IBAN_CODE: 'ACCOUNT_NUMBER',
+  CREDIT_CARD: 'ACCOUNT_NUMBER',
+  CREDIT_CARD_NUMBER: 'ACCOUNT_NUMBER',
+  LICENSE_NUMBER: 'LICENSE_NUMBER',
+  US_DRIVER_LICENSE: 'LICENSE_NUMBER',
+  PROFESSIONAL_LICENSE: 'LICENSE_NUMBER',
+  VEHICLE_ID: 'VEHICLE_ID',
+  VIN: 'VEHICLE_ID',
+  US_VEHICLE_VIN: 'VEHICLE_ID',
+  LICENSE_PLATE: 'VEHICLE_ID',
+  DEVICE_ID: 'DEVICE_ID',
+  DEVICE_SERIAL: 'DEVICE_ID',
+  IMEI: 'DEVICE_ID',
+
+  // Web identifiers
+  URL: 'URL',
+  IP_ADDRESS: 'IP_ADDRESS',
+
+  // Biometric / face photo / other unique ID
+  BIOMETRIC_ID: 'BIOMETRIC_ID',
+  FACE_PHOTO_REF: 'FACE_PHOTO_REF',
+  STUDY_ID: 'OTHER_UNIQUE_ID',
+  OTHER_UNIQUE_ID: 'OTHER_UNIQUE_ID',
+  PASSPORT: 'OTHER_UNIQUE_ID',
+  US_PASSPORT: 'OTHER_UNIQUE_ID',
+  US_ITIN: 'OTHER_UNIQUE_ID',
+});
+
+/**
+ * Parse the internal type prefix out of a `[TYPE_N]` placeholder. Returns
+ * null for malformed placeholders.
+ *
+ * Mirrors the gateway's own parsing in `extractEntityTypes`
+ * (`proxy.go:1361-1395`): require leading `[`, trailing `]`, at least one
+ * underscore, and an all-digit suffix.
+ */
+export function parsePlaceholderType(placeholder: string): string | null {
+  if (placeholder.length < 4) return null;
+  if (placeholder[0] !== '[' || placeholder[placeholder.length - 1] !== ']') return null;
+  const inner = placeholder.slice(1, -1);
+  const lastUnderscore = inner.lastIndexOf('_');
+  if (lastUnderscore < 1) return null;
+  const suffix = inner.slice(lastUnderscore + 1);
+  if (suffix.length === 0) return null;
+  for (const c of suffix) {
+    if (c < '0' || c > '9') return null;
+  }
+  return inner.slice(0, lastUnderscore);
+}
+
+/**
+ * Map a Lucairn `[TYPE_N]` placeholder to its HIPAA Safe Harbor category.
+ * Returns null when the internal type is not in `LUCAIRN_TO_HIPAA`.
+ */
+export function placeholderToHipaaCategory(placeholder: string): HipaaCategory | null {
+  const t = parsePlaceholderType(placeholder);
+  if (t === null) return null;
+  return LUCAIRN_TO_HIPAA[t] ?? null;
+}
diff --git a/src/index.ts b/src/index.ts
index cb0ff5c..c481b7e 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -1 +1,60 @@
-export {};
+/**
+ * Public API surface for @lucairn/research methodology code.
+ *
+ * The repo is not published to npm; consumers run it from a clone. This
+ * barrel keeps the script + test imports terse and documents the supported
+ * extension points for future research papers.
+ */
+
+export {
+  HIPAA_CATEGORIES,
+  type HipaaCategory,
+  type InjectedEntity,
+  type InjectedRow,
+} from './inject-pii-core.js';
+
+export {
+  LUCAIRN_TO_HIPAA,
+  parsePlaceholderType,
+  placeholderToHipaaCategory,
+} from './hipaa-category-mapping.js';
+
+export {
+  type GatewayClient,
+  type GatewayClientOptions,
+  GatewayClientError,
+  type GatewayRequestBody,
+  type GatewayResponse,
+  type GatewayRowInput,
+  type GatewayRowResult,
+  type GroundTruthEvaluation,
+  type GroundTruthExtra,
+  type GroundTruthMatch,
+  type GroundTruthMiss,
+  type ProvingGroundAnnotation,
+  type VeilHint,
+  extractCertUrls,
+  makeGatewayClient,
+  readGatewayEnv,
+} from './gateway-client.js';
+
+export {
+  type ExtractedRedaction,
+  type RedactionVerdict,
+  extractFromEvaluation,
+  unmappedExtraTypes,
+} from './redaction-extractor.js';
+
+export {
+  type CategoryCounts,
+  type OverallCounts,
+  type PredictedSpan,
+  type RecallSummary,
+  type RowBreakdown,
+  type SpanEntity,
+  SPAN_OVERLAP_THRESHOLD,
+  aggregateExtracted,
+  computeRecallFromSpans,
+} from './recall.js';
+
+export { emitCsv, parseCsv, type CsvRow } from './csv.js';
diff --git a/src/mocks/gateway-fixtures.ts b/src/mocks/gateway-fixtures.ts
new file mode 100644
index 0000000..9dbf652
--- /dev/null
+++ b/src/mocks/gateway-fixtures.ts
@@ -0,0 +1,198 @@
+/**
+ * mocks/gateway-fixtures.ts
+ *
+ * Deterministic mock-response builders that mirror the real gateway's
+ * proving-ground response shape exactly. Tests and the `--mock` smoke
+ * scripts mount these via msw (see msw setup in test files).
+ *
+ * The shape MUST track the gateway sources cited in
+ * `src/gateway-client.ts` — any divergence is a Slice 3 hazard.
+ */
+
+import type { InjectedEntity } from '../inject-pii-core.js';
+import type {
+  GatewayResponse,
+  GroundTruthExtra,
+  GroundTruthMatch,
+  GroundTruthMiss,
+  ProvingGroundAnnotation,
+} from '../gateway-client.js';
+import { mulberry32 } from '../inject-pii-core.js';
+
+export interface MockBuilderOptions {
+  readonly rowIndex: number;
+  readonly entities: readonly InjectedEntity[];
+  /** Fraction in [0, 1] of injected entities the mock should "miss". 0 = perfect recall, 1.0 = no detections. */
+  readonly missRate?: number;
+  /** Optional fixed seed for the per-row PRNG. Default: rowIndex. */
+  readonly seed?: number;
+  /** When provided, latency_ms field is set to this value. Default: deterministic-pseudo. */
+  readonly latencyMsOverride?: number;
+  /** Synthetic spurious-redaction count, simulating false positives. */
+  readonly spuriousFpCount?: number;
+}
+
+const PLACEHOLDER_FOR_CATEGORY: Readonly<Record<string, string>> = {
+  NAME: 'PERSON',
+  GEO_SUBDIVISION: 'LOCATION',
+  DATE: 'DATE',
+  PHONE: 'PHONE_NUMBER',
+  FAX: 'FAX_NUMBER',
+  EMAIL: 'EMAIL_ADDRESS',
+  SSN: 'US_SSN',
+  MRN: 'MEDICAL_RECORD_NUMBER',
+  HEALTH_PLAN_ID: 'HEALTH_PLAN_ID',
+  ACCOUNT_NUMBER: 'ACCOUNT_NUMBER',
+  LICENSE_NUMBER: 'LICENSE_NUMBER',
+  VEHICLE_ID: 'VEHICLE_ID',
+  DEVICE_ID: 'DEVICE_ID',
+  URL: 'URL',
+  IP_ADDRESS: 'IP_ADDRESS',
+  BIOMETRIC_ID: 'BIOMETRIC_ID',
+  FACE_PHOTO_REF: 'FACE_PHOTO_REF',
+  OTHER_UNIQUE_ID: 'STUDY_ID',
+};
+
+/**
+ * Build a mock gateway response for a single row. Determinism: given the
+ * same options the output is byte-identical across runs and platforms (the
+ * miss-selection PRNG is mulberry32-seeded).
+ */
+export function buildMockResponse(options: MockBuilderOptions): GatewayResponse {
+  const missRate = clampUnit(options.missRate ?? 0);
+  const seed = options.seed ?? options.rowIndex;
+  const rng = mulberry32(seed);
+  const spuriousFpCount = Math.max(0, options.spuriousFpCount ?? 0);
+
+  const matches: GroundTruthMatch[] = [];
+  const missed: GroundTruthMiss[] = [];
+  // Deterministic per-category sequence counters for placeholder N suffix.
+  const seqByType = new Map<string, number>();
+
+  for (const e of options.entities) {
+    const draw = rng();
+    if (draw < missRate) {
+      missed.push({ field: 'transcription', type: e.category, value: e.value });
+    } else {
+      const internalType = PLACEHOLDER_FOR_CATEGORY[e.category] ?? 'OTHER';
+      const nextN = (seqByType.get(internalType) ?? 0) + 1;
+      seqByType.set(internalType, nextN);
+      matches.push({
+        annotation_type: e.category,
+        annotation_value: e.value,
+        redacted_as: `[${internalType}_${nextN}]`,
+      });
+    }
+  }
+
+  const extras: GroundTruthExtra[] = [];
+  for (let i = 0; i < spuriousFpCount; i++) {
+    // Synthesise plausible-looking spurious detections so FP-handling code
+    // paths can be exercised. Use deterministic pseudo-text.
+    const internalType = ['PERSON', 'LOCATION', 'PHONE_NUMBER'][i % 3] ?? 'PERSON';
+    const nextN = (seqByType.get(internalType) ?? 0) + 1;
+    seqByType.set(internalType, nextN);
+    extras.push({
+      placeholder: `[${internalType}_${nextN}]`,
+      original: `spurious_${seed}_${i}`,
+    });
+  }
+
+  const totalAnnotations = options.entities.length;
+  const truePositives = matches.length;
+  const falseNegatives = missed.length;
+  const falsePositives = extras.length;
+  const detectionRate =
+    totalAnnotations === 0 ? 1.0 : truePositives / totalAnnotations;
+
+  const certId = pseudoCertId(seed);
+  return {
+    request_id: `req_${seed.toString(16).padStart(8, '0')}`,
+    status: 'JOB_STATUS_COMPLETED',
+    latency_ms: options.latencyMsOverride ?? 250,
+    result: 'mock-result-omitted',
+    redaction_count: truePositives + falsePositives,
+    ground_truth_evaluation: {
+      total_annotations: totalAnnotations,
+      true_positives: truePositives,
+      false_negatives: falseNegatives,
+      false_positives: falsePositives,
+      detection_rate: detectionRate,
+      matches,
+      missed,
+      extras,
+    },
+    veil: {
+      status: 'available',
+      certificate_url: `/api/v1/veil/certificate/${certId}`,
+      summary_url: `/api/v1/veil/certificate/${certId}/summary`,
+    },
+  };
+}
+
+/**
+ * Stub helper used by test mocks to recover the per-row ground truth from a
+ * request body. Mirrors the wire shape: ground_truth.transcription is an
+ * array of ProvingGroundAnnotation.
+ */
+export function entitiesFromRequestBody(body: unknown): {
+  rowIndex: number | null;
+  entities: InjectedEntity[];
+} {
+  if (typeof body !== 'object' || body === null) {
+    return { rowIndex: null, entities: [] };
+  }
+  const obj = body as Record<string, unknown>;
+  const activity = obj['activity_id'];
+  let rowIndex: number | null = null;
+  if (typeof activity === 'string') {
+    const match = /-row-(\d+)$/u.exec(activity);
+    if (match !== null) {
+      const parsed = Number.parseInt(match[1] ?? '', 10);
+      if (Number.isFinite(parsed)) rowIndex = parsed;
+    }
+  }
+  const gtRaw = obj['ground_truth'];
+  if (typeof gtRaw !== 'object' || gtRaw === null) {
+    return { rowIndex, entities: [] };
+  }
+  const transcription = (gtRaw as Record<string, unknown>)['transcription'];
+  if (!Array.isArray(transcription)) {
+    return { rowIndex, entities: [] };
+  }
+  const entities: InjectedEntity[] = [];
+  for (const item of transcription) {
+    if (typeof item !== 'object' || item === null) continue;
+    const a = item as Partial<ProvingGroundAnnotation>;
+    if (
+      typeof a.type === 'string' &&
+      typeof a.value === 'string' &&
+      typeof a.start === 'number' &&
+      typeof a.end === 'number'
+    ) {
+      entities.push({
+        category: a.type,
+        value: a.value,
+        start_char: a.start,
+        end_char: a.end,
+      });
+    }
+  }
+  return { rowIndex, entities };
+}
+
+function clampUnit(x: number): number {
+  if (x < 0) return 0;
+  if (x > 1) return 1;
+  return x;
+}
+
+function pseudoCertId(seed: number): string {
+  // 32 hex chars, deterministic per seed via two mulberry32 draws.
+  const rng = mulberry32(seed);
+  let out = '';
+  for (let i = 0; i < 4; i++) {
+    out += Math.floor(rng() * 0x1_0000_0000).toString(16).padStart(8, '0');
+  }
+  return out;
+}
diff --git a/src/recall.ts b/src/recall.ts
new file mode 100644
index 0000000..615f03e
--- /dev/null
+++ b/src/recall.ts
@@ -0,0 +1,359 @@
+/**
+ * recall.ts
+ *
+ * Per-HIPAA-category recall / precision / F1 aggregation.
+ *
+ * Two consumer paths:
+ *
+ *   1. `aggregateExtracted(extracted)` — the path the harness uses live.
+ *      Consumes redactions already tagged TP/FN/FP by the gateway's
+ *      proving-ground evaluator (value-containment matching, server-side).
+ *      Per-category counts are derived from `hipaa_category` on each
+ *      `ExtractedRedaction`. This path is the source of truth for any number
+ *      published in Paper 1 because the matching is performed inside the
+ *      gateway, not by code that the publisher (Lucairn) also wrote — the
+ *      arm's-length property compliance buyers care about.
+ *
+ *   2. `computeRecallFromSpans(groundTruth, predictedSpans)` — pure math
+ *      layer with span-overlap matching at the ≥50%-character-overlap
+ *      threshold locked in the Slice 2 dispatch brief. Useful when a future
+ *      gateway surface exposes raw per-entity spans inline (none does today;
+ *      see slice-2 brief lines 47-67 for the citation chain). Lets the
+ *      research repo evolve its recall semantics without re-implementing
+ *      aggregation downstream.
+ *
+ * Both paths produce the same `RecallSummary` shape.
+ */
+
+import { HIPAA_CATEGORIES, type HipaaCategory, type InjectedRow } from './inject-pii-core.js';
+import type { ExtractedRedaction } from './redaction-extractor.js';
+
+/** Overlap threshold for `computeRecallFromSpans`. Locked at 50% per Slice 2 brief. */
+export const SPAN_OVERLAP_THRESHOLD = 0.5;
+
+export interface CategoryCounts {
+  readonly tp: number;
+  readonly fp: number;
+  readonly fn: number;
+  readonly precision: number;
+  readonly recall: number;
+  readonly f1: number;
+}
+
+export interface OverallCounts {
+  readonly tp: number;
+  readonly fp: number;
+  readonly fn: number;
+  readonly precision: number;
+  readonly recall: number;
+  readonly f1: number;
+  /** Total annotations in the ground truth (TP + FN). */
+  readonly total_annotations: number;
+}
+
+export interface RowBreakdown {
+  readonly row_index: number;
+  readonly tp: number;
+  readonly fp: number;
+  readonly fn: number;
+  readonly recall: number;
+}
+
+export interface RecallSummary {
+  readonly schema_version: '1.0';
+  readonly generator: 'lucairn-research/recall.ts';
+  readonly overall: OverallCounts;
+  /** Sorted by HipaaCategory canonical order from `HIPAA_CATEGORIES`. */
+  readonly per_category: ReadonlyArray<{ category: HipaaCategory; counts: CategoryCounts }>;
+  /** Sorted by row_index ascending. */
+  readonly per_row: readonly RowBreakdown[];
+  readonly notes: readonly string[];
+}
+
+/** Spans with start ≤ end. Treated as half-open intervals [start, end). */
+export interface SpanEntity {
+  readonly category: HipaaCategory;
+  readonly value: string;
+  readonly start_char: number;
+  readonly end_char: number;
+}
+
+export interface PredictedSpan {
+  /** Optional Lucairn-internal type for diagnostics; not required for matching. */
+  readonly category?: HipaaCategory | null;
+  readonly start_char: number;
+  readonly end_char: number;
+  /** Original PHI text the sanitizer matched, when known. */
+  readonly value?: string;
+}
+
+interface MutableCategoryCounts {
+  tp: number;
+  fp: number;
+  fn: number;
+}
+
+function emptyCategoryCounts(): MutableCategoryCounts {
+  return { tp: 0, fp: 0, fn: 0 };
+}
+
+/**
+ * Derive precision, recall, F1 from raw TP/FP/FN. When (TP+FP)==0 or
+ * (TP+FN)==0 we report 0 rather than NaN; that is the more useful behaviour
+ * for aggregating summaries across rows where one category may be absent.
+ */
+function deriveRates(tp: number, fp: number, fn: number): {
+  precision: number;
+  recall: number;
+  f1: number;
+} {
+  const precision = tp + fp === 0 ? 0 : tp / (tp + fp);
+  const recall = tp + fn === 0 ? 0 : tp / (tp + fn);
+  const f1 = precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall);
+  return { precision, recall, f1 };
+}
+
+function finaliseCategory(c: MutableCategoryCounts): CategoryCounts {
+  const r = deriveRates(c.tp, c.fp, c.fn);
+  return { tp: c.tp, fp: c.fp, fn: c.fn, ...r };
+}
+
+/**
+ * Aggregate gateway-attested TP/FP/FN verdicts into a RecallSummary.
+ */
+export function aggregateExtracted(
+  extracted: readonly ExtractedRedaction[],
+  notes: readonly string[] = [],
+): RecallSummary {
+  const perCat: Map<HipaaCategory, MutableCategoryCounts> = new Map();
+  for (const cat of HIPAA_CATEGORIES) {
+    perCat.set(cat, emptyCategoryCounts());
+  }
+  // "unknown" bucket for verdicts the harness could not tag with a HIPAA
+  // category (e.g. an unmapped Lucairn placeholder appearing in extras). We
+  // track it separately so it does not contaminate per-category numbers but
+  // is still surfaced in `overall` + a note.
+  let unknownTp = 0;
+  let unknownFp = 0;
+  let unknownFn = 0;
+
+  const perRow: Map<number, MutableCategoryCounts> = new Map();
+
+  for (const r of extracted) {
+    let bucket: MutableCategoryCounts | undefined;
+    if (r.hipaa_category !== null) {
+      bucket = perCat.get(r.hipaa_category);
+    }
+    if (bucket === undefined) {
+      // Bump the unknown tallies; still tally per-row.
+      if (r.verdict === 'tp') unknownTp += 1;
+      else if (r.verdict === 'fp') unknownFp += 1;
+      else unknownFn += 1;
+    } else {
+      if (r.verdict === 'tp') bucket.tp += 1;
+      else if (r.verdict === 'fp') bucket.fp += 1;
+      else bucket.fn += 1;
+    }
+
+    const rowKey = r.row_index;
+    let rowBucket = perRow.get(rowKey);
+    if (rowBucket === undefined) {
+      rowBucket = emptyCategoryCounts();
+      perRow.set(rowKey, rowBucket);
+    }
+    if (r.verdict === 'tp') rowBucket.tp += 1;
+    else if (r.verdict === 'fp') rowBucket.fp += 1;
+    else rowBucket.fn += 1;
+  }
+
+  let totTp = unknownTp;
+  let totFp = unknownFp;
+  let totFn = unknownFn;
+  const perCategory: Array<{ category: HipaaCategory; counts: CategoryCounts }> = [];
+  for (const cat of HIPAA_CATEGORIES) {
+    const c = perCat.get(cat) ?? emptyCategoryCounts();
+    totTp += c.tp;
+    totFp += c.fp;
+    totFn += c.fn;
+    perCategory.push({ category: cat, counts: finaliseCategory(c) });
+  }
+
+  const overallRates = deriveRates(totTp, totFp, totFn);
+  const perRowOut: RowBreakdown[] = Array.from(perRow.entries())
+    .sort((a, b) => a[0] - b[0])
+    .map(([rowIndex, c]) => {
+      const r = deriveRates(c.tp, c.fp, c.fn);
+      return { row_index: rowIndex, tp: c.tp, fp: c.fp, fn: c.fn, recall: r.recall };
+    });
+
+  const allNotes: string[] = [...notes];
+  if (unknownTp + unknownFp + unknownFn > 0) {
+    allNotes.push(
+      `Encountered ${unknownTp + unknownFp + unknownFn} verdict(s) with no HIPAA category mapping ` +
+        `(tp=${unknownTp} fp=${unknownFp} fn=${unknownFn}). These are included in overall counts ` +
+        'but not in per_category. Extend src/hipaa-category-mapping.ts if these are recurring.',
+    );
+  }
+
+  return {
+    schema_version: '1.0',
+    generator: 'lucairn-research/recall.ts',
+    overall: {
+      tp: totTp,
+      fp: totFp,
+      fn: totFn,
+      total_annotations: totTp + totFn,
+      ...overallRates,
+    },
+    per_category: perCategory,
+    per_row: perRowOut,
+    notes: allNotes,
+  };
+}
+
+/**
+ * ≥50%-character-overlap span matcher. A prediction `p` matches a ground-
+ * truth entity `g` when `(overlap_chars(p, g) / length(g)) >= 0.5`. Ties
+ * (same overlap fraction for two predictions against the same ground-truth)
+ * resolve to the earlier predicted span (lower start_char), then the
+ * smaller predicted-span length — fully deterministic.
+ *
+ * Each ground-truth entity matches at most one prediction; each prediction
+ * matches at most one ground-truth entity. Unmatched ground truth → FN.
+ * Unmatched prediction → FP.
+ *
+ * Per-category tally: when matched, the ground-truth entity's category is
+ * the one credited (since that is the category we knew was injected).
+ */
+export function computeRecallFromSpans(
+  groundTruth: readonly InjectedRow[],
+  predictedSpans: readonly { row_index: number; spans: readonly PredictedSpan[] }[],
+  notes: readonly string[] = [],
+): RecallSummary {
+  const predictedByRow = new Map<number, readonly PredictedSpan[]>();
+  for (const p of predictedSpans) {
+    predictedByRow.set(p.row_index, p.spans);
+  }
+
+  const perCat: Map<HipaaCategory, MutableCategoryCounts> = new Map();
+  for (const cat of HIPAA_CATEGORIES) {
+    perCat.set(cat, emptyCategoryCounts());
+  }
+  const perRow: Map<number, MutableCategoryCounts> = new Map();
+
+  for (const row of groundTruth) {
+    const truth = row.entities;
+    const preds = predictedByRow.get(row.row_index) ?? [];
+    const matched = matchSpans(truth, preds);
+
+    const rowBucket: MutableCategoryCounts = emptyCategoryCounts();
+    for (const t of truth) {
+      const m = matched.truthToPred.get(t);
+      const catBucket = perCat.get(t.category);
+      if (m === undefined) {
+        if (catBucket !== undefined) catBucket.fn += 1;
+        rowBucket.fn += 1;
+      } else {
+        if (catBucket !== undefined) catBucket.tp += 1;
+        rowBucket.tp += 1;
+      }
+    }
+    for (const p of preds) {
+      if (!matched.predMatched.has(p)) {
+        // Tag FP to the predicted span's own category if known; otherwise
+        // bump the unknown bucket.
+        const catBucket = p.category != null ? perCat.get(p.category) : undefined;
+        if (catBucket !== undefined) catBucket.fp += 1;
+        rowBucket.fp += 1;
+      }
+    }
+    perRow.set(row.row_index, rowBucket);
+  }
+
+  let totTp = 0;
+  let totFp = 0;
+  let totFn = 0;
+  const perCategory: Array<{ category: HipaaCategory; counts: CategoryCounts }> = [];
+  for (const cat of HIPAA_CATEGORIES) {
+    const c = perCat.get(cat) ?? emptyCategoryCounts();
+    totTp += c.tp;
+    totFp += c.fp;
+    totFn += c.fn;
+    perCategory.push({ category: cat, counts: finaliseCategory(c) });
+  }
+  const overallRates = deriveRates(totTp, totFp, totFn);
+  const perRowOut: RowBreakdown[] = Array.from(perRow.entries())
+    .sort((a, b) => a[0] - b[0])
+    .map(([rowIndex, c]) => {
+      const r = deriveRates(c.tp, c.fp, c.fn);
+      return { row_index: rowIndex, tp: c.tp, fp: c.fp, fn: c.fn, recall: r.recall };
+    });
+
+  return {
+    schema_version: '1.0',
+    generator: 'lucairn-research/recall.ts',
+    overall: {
+      tp: totTp,
+      fp: totFp,
+      fn: totFn,
+      total_annotations: totTp + totFn,
+      ...overallRates,
+    },
+    per_category: perCategory,
+    per_row: perRowOut,
+    notes,
+  };
+}
+
+interface MatchResult {
+  truthToPred: Map<SpanEntity, PredictedSpan>;
+  predMatched: Set<PredictedSpan>;
+}
+
+function matchSpans(
+  truth: readonly SpanEntity[],
+  preds: readonly PredictedSpan[],
+): MatchResult {
+  // Build candidate pairs sorted by (overlap_fraction desc, pred.start_char
+  // asc, pred-length asc). Greedy assign — first pair wins, each truth and
+  // each prediction can match at most once.
+  const candidates: Array<{
+    truth: SpanEntity;
+    pred: PredictedSpan;
+    overlapFraction: number;
+  }> = [];
+  for (const t of truth) {
+    const truthLen = Math.max(0, t.end_char - t.start_char);
+    if (truthLen === 0) continue;
+    for (const p of preds) {
+      const overlap = Math.max(
+        0,
+        Math.min(t.end_char, p.end_char) - Math.max(t.start_char, p.start_char),
+      );
+      if (overlap <= 0) continue;
+      const frac = overlap / truthLen;
+      if (frac >= SPAN_OVERLAP_THRESHOLD) {
+        candidates.push({ truth: t, pred: p, overlapFraction: frac });
+      }
+    }
+  }
+  candidates.sort((a, b) => {
+    if (b.overlapFraction !== a.overlapFraction) {
+      return b.overlapFraction - a.overlapFraction;
+    }
+    if (a.pred.start_char !== b.pred.start_char) {
+      return a.pred.start_char - b.pred.start_char;
+    }
+    return a.pred.end_char - a.pred.start_char - (b.pred.end_char - b.pred.start_char);
+  });
+  const truthMatched = new Set<SpanEntity>();
+  const predMatched = new Set<PredictedSpan>();
+  const truthToPred = new Map<SpanEntity, PredictedSpan>();
+  for (const c of candidates) {
+    if (truthMatched.has(c.truth) || predMatched.has(c.pred)) continue;
+    truthMatched.add(c.truth);
+    predMatched.add(c.pred);
+    truthToPred.set(c.truth, c.pred);
+  }
+  return { truthToPred, predMatched };
+}
diff --git a/src/redaction-extractor.ts b/src/redaction-extractor.ts
new file mode 100644
index 0000000..87f3239
--- /dev/null
+++ b/src/redaction-extractor.ts
@@ -0,0 +1,127 @@
+/**
+ * redaction-extractor.ts
+ *
+ * Pure function that converts a gateway proving-ground response into a
+ * uniform per-entity record stream the recall computation can consume.
+ *
+ * Why this layer exists:
+ *   The gateway's proving-ground response carries three structured arrays —
+ *   `matches[]` (true positives), `missed[]` (false negatives), and
+ *   `extras[]` (false positives unmatched by ground truth) — keyed off the
+ *   caller-supplied annotation type. The harness needs a single flat record
+ *   per gateway-emitted decision, tagged with its HIPAA Safe Harbor category
+ *   and a verdict (TP / FN / FP), so the recall layer can aggregate
+ *   per-category without re-parsing the response shape.
+ *
+ * Cite-back: gateway emits `matches`/`missed`/`extras` at
+ *   dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:5-138.
+ */
+
+import type { HipaaCategory, HIPAA_CATEGORIES } from './inject-pii-core.js';
+import type { GroundTruthEvaluation } from './gateway-client.js';
+import { placeholderToHipaaCategory } from './hipaa-category-mapping.js';
+
+// Imported only as the type-source for HipaaCategory; the runtime constant is
+// imported via the values import below to satisfy isolated-modules + the
+// noUnusedImports lint policy.
+// eslint-disable-next-line @typescript-eslint/no-unused-vars
+type _CategoryTypeAnchor = (typeof HIPAA_CATEGORIES)[number];
+
+import { HIPAA_CATEGORIES as HIPAA_CATEGORIES_VALUES } from './inject-pii-core.js';
+
+const HIPAA_SET = new Set<string>(HIPAA_CATEGORIES_VALUES as readonly string[]);
+
+export type RedactionVerdict = 'tp' | 'fn' | 'fp';
+
+export interface ExtractedRedaction {
+  readonly row_index: number;
+  readonly hipaa_category: HipaaCategory | null;
+  readonly verdict: RedactionVerdict;
+  /** Value the gateway compared against (ground-truth value for TP/FN; original PHI for FP). */
+  readonly value: string;
+  /** Sanitizer placeholder (e.g. `[PERSON_1]`) for TP/FP; null for FN. */
+  readonly placeholder: string | null;
+  /** When known, the field name the gateway saw (`transcription` for the harness). */
+  readonly field: string | null;
+}
+
+/**
+ * Convert a single proving-ground evaluation block into a list of flat
+ * extracted redactions tagged with HIPAA category + verdict.
+ *
+ * - TP rows: category = annotation_type (HIPAA-tagged in our submission).
+ * - FN rows: category = type (same source).
+ * - FP rows: category derived from the sanitizer placeholder via
+ *   `placeholderToHipaaCategory`; null when the placeholder type is
+ *   unmapped (still emitted with verdict=fp so the FP count is preserved).
+ */
+export function extractFromEvaluation(
+  rowIndex: number,
+  evaluation: GroundTruthEvaluation,
+): ExtractedRedaction[] {
+  const out: ExtractedRedaction[] = [];
+  for (const m of evaluation.matches ?? []) {
+    out.push({
+      row_index: rowIndex,
+      hipaa_category: tagAsHipaa(m.annotation_type),
+      verdict: 'tp',
+      value: m.annotation_value,
+      placeholder: m.redacted_as,
+      field: null,
+    });
+  }
+  for (const miss of evaluation.missed ?? []) {
+    out.push({
+      row_index: rowIndex,
+      hipaa_category: tagAsHipaa(miss.type),
+      verdict: 'fn',
+      value: miss.value,
+      placeholder: null,
+      field: miss.field,
+    });
+  }
+  for (const extra of evaluation.extras ?? []) {
+    out.push({
+      row_index: rowIndex,
+      hipaa_category: placeholderToHipaaCategory(extra.placeholder),
+      verdict: 'fp',
+      value: extra.original,
+      placeholder: extra.placeholder,
+      field: null,
+    });
+  }
+  return out;
+}
+
+/**
+ * Narrow a free-form string to HipaaCategory when it matches one of the 18
+ * canonical names exactly; otherwise null. This keeps malformed gateway
+ * payloads from silently widening the type.
+ */
+function tagAsHipaa(s: string): HipaaCategory | null {
+  return HIPAA_SET.has(s) ? (s as HipaaCategory) : null;
+}
+
+/**
+ * Verify the mapping table covers every Lucairn internal type observed in a
+ * supplied evaluation's `extras[]`. Returns the list of unmapped types found
+ * (empty if the mapping is complete for this sample). Used by
+ * `test/redaction-extractor.spec.ts` to detect taxonomy drift.
+ */
+export function unmappedExtraTypes(evaluation: GroundTruthEvaluation): string[] {
+  const seen = new Set<string>();
+  const unmapped: string[] = [];
+  for (const e of evaluation.extras ?? []) {
+    const mapped = placeholderToHipaaCategory(e.placeholder);
+    if (mapped === null) {
+      // Pull the inner type for the report.
+      const stripped = e.placeholder.replace(/^\[|\]$/gu, '');
+      const t = stripped.replace(/_\d+$/u, '');
+      if (!seen.has(t)) {
+        seen.add(t);
+        unmapped.push(t);
+      }
+    }
+  }
+  return unmapped;
+}

From b93244c2b3b4abb51238241797fdc1694f12d2e5 Mon Sep 17 00:00:00 2001
From: Declade <110547349+Declade@users.noreply.github.com>
Date: Sun, 17 May 2026 11:30:58 +0200
Subject: [PATCH 2/5] feat(slice-2): pipeline runner, cert collector, recall
 computer + SUMMARY schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the three CLI scripts the Slice 2 harness needs:

- scripts/run-pipeline.ts — orchestrates per-row gateway calls via
  POST /api/v1/proxy/messages (mode=proving_ground). --mock mounts an msw
  fixture server in-process; --live is reserved for Slice 3 and refuses to
  start without the explicit gate. Writes raw NDJSON to
  papers/paper-1-healthcare/raw-results/<timestamp>.ndjson (or --output).
  Supports --rows / --truth / --subset / --gateway / --api-key / --miss-rate
  / --spurious-fp-count / --activity-id-prefix.

- scripts/collect-certs.ts — walks the NDJSON, extracts cert URL + summary
  URL + redaction count + overall verdict per row, emits CERTIFICATES.csv
  via src/csv.ts::emitCsv. Columns:
    row_index, cert_url, cert_id, summary_url, overall_verdict,
    redaction_count, latency_ms, timestamp_utc, error_code

- scripts/compute-recall.ts — reads ground-truth JSONL + raw NDJSON (or
  re-runs the in-process mock when --redactions-source=mock), aggregates
  per-HIPAA-category recall / precision / F1 via aggregateExtracted, emits
  SUMMARY.json, validates against papers/_template/SUMMARY.schema.json
  in-process. Avoids a runtime dep on ajv via a minimal validator covering
  the schema subset used.

Also adds:

- papers/_template/SUMMARY.schema.json — Draft 2020-12 JSON Schema for
  SUMMARY.json. Enforces 18-category coverage in per_category, the four
  required overall fields, the RowBreakdown shape, and the schema_version /
  generator const fields. Reused by every paper in the program.
- papers/paper-1-healthcare/raw-results/.gitignore + .gitkeep — directory
  scaffold; per-run NDJSON is gitignored at the repo level
  (datasets/.gitignore line 17) but the per-paper sub-tree's own .gitignore
  locks it locally too.
- package.json — adds pipeline / collect-certs / compute-recall scripts.

End-to-end smoke (all PASS):
  pnpm run pipeline -- --rows=5 --mock --output=/tmp/slice2-smoke.ndjson
  pnpm run collect-certs -- --input=/tmp/slice2-smoke.ndjson --output=/tmp/slice2-CERTIFICATES.csv
  pnpm run compute-recall -- --truth=ground-truth.jsonl --redactions-source=mock --rows=5 --output=/tmp/slice2-SUMMARY.json
---
 package.json                         |   5 +-
 papers/_template/SUMMARY.schema.json | 126 +++++++++
 scripts/collect-certs.ts             | 163 ++++++++++++
 scripts/compute-recall.ts            | 359 +++++++++++++++++++++++++
 scripts/run-pipeline.ts              | 374 +++++++++++++++++++++++++++
 5 files changed, 1026 insertions(+), 1 deletion(-)
 create mode 100644 papers/_template/SUMMARY.schema.json
 create mode 100644 scripts/collect-certs.ts
 create mode 100644 scripts/compute-recall.ts
 create mode 100644 scripts/run-pipeline.ts

diff --git a/package.json b/package.json
index 5279c6e..2c2e8c9 100644
--- a/package.json
+++ b/package.json
@@ -20,7 +20,10 @@
     "test:watch": "vitest",
     "dataset:download": "node --import tsx scripts/download-mtsamples.ts",
     "dataset:inject-pii": "node --import tsx scripts/inject-pii.ts",
-    "dataset:verify-injection": "node --import tsx scripts/verify-injection.ts"
+    "dataset:verify-injection": "node --import tsx scripts/verify-injection.ts",
+    "pipeline": "node --import tsx scripts/run-pipeline.ts",
+    "collect-certs": "node --import tsx scripts/collect-certs.ts",
+    "compute-recall": "node --import tsx scripts/compute-recall.ts"
   },
   "devDependencies": {
     "@faker-js/faker": "^9.0.0",
diff --git a/papers/_template/SUMMARY.schema.json b/papers/_template/SUMMARY.schema.json
new file mode 100644
index 0000000..f1a2f4f
--- /dev/null
+++ b/papers/_template/SUMMARY.schema.json
@@ -0,0 +1,126 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "$id": "https://github.com/Declade/lucairn-research/papers/_template/SUMMARY.schema.json",
+  "title": "Lucairn Research Program — per-paper SUMMARY.json",
+  "description": "Aggregate recall / precision / F1 numbers per HIPAA Safe Harbor category + overall + per-row breakdown for any paper in the Lucairn Research Program. Mirrors the RecallSummary shape produced by src/recall.ts.",
+  "type": "object",
+  "required": [
+    "schema_version",
+    "generator",
+    "overall",
+    "per_category",
+    "per_row",
+    "notes"
+  ],
+  "additionalProperties": false,
+  "properties": {
+    "schema_version": {
+      "type": "string",
+      "const": "1.0"
+    },
+    "generator": {
+      "type": "string",
+      "const": "lucairn-research/recall.ts"
+    },
+    "overall": {
+      "$ref": "#/$defs/OverallCounts"
+    },
+    "per_category": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "required": ["category", "counts"],
+        "additionalProperties": false,
+        "properties": {
+          "category": {
+            "$ref": "#/$defs/HipaaCategory"
+          },
+          "counts": {
+            "$ref": "#/$defs/CategoryCounts"
+          }
+        }
+      },
+      "minItems": 18,
+      "maxItems": 18
+    },
+    "per_row": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "required": ["row_index", "tp", "fp", "fn", "recall"],
+        "additionalProperties": false,
+        "properties": {
+          "row_index": { "type": "integer", "minimum": 0 },
+          "tp": { "type": "integer", "minimum": 0 },
+          "fp": { "type": "integer", "minimum": 0 },
+          "fn": { "type": "integer", "minimum": 0 },
+          "recall": { "type": "number", "minimum": 0, "maximum": 1 }
+        }
+      }
+    },
+    "notes": {
+      "type": "array",
+      "items": { "type": "string" }
+    }
+  },
+  "$defs": {
+    "HipaaCategory": {
+      "type": "string",
+      "enum": [
+        "NAME",
+        "GEO_SUBDIVISION",
+        "DATE",
+        "PHONE",
+        "FAX",
+        "EMAIL",
+        "SSN",
+        "MRN",
+        "HEALTH_PLAN_ID",
+        "ACCOUNT_NUMBER",
+        "LICENSE_NUMBER",
+        "VEHICLE_ID",
+        "DEVICE_ID",
+        "URL",
+        "IP_ADDRESS",
+        "BIOMETRIC_ID",
+        "FACE_PHOTO_REF",
+        "OTHER_UNIQUE_ID"
+      ]
+    },
+    "CategoryCounts": {
+      "type": "object",
+      "required": ["tp", "fp", "fn", "precision", "recall", "f1"],
+      "additionalProperties": false,
+      "properties": {
+        "tp": { "type": "integer", "minimum": 0 },
+        "fp": { "type": "integer", "minimum": 0 },
+        "fn": { "type": "integer", "minimum": 0 },
+        "precision": { "type": "number", "minimum": 0, "maximum": 1 },
+        "recall": { "type": "number", "minimum": 0, "maximum": 1 },
+        "f1": { "type": "number", "minimum": 0, "maximum": 1 }
+      }
+    },
+    "OverallCounts": {
+      "type": "object",
+      "required": [
+        "tp",
+        "fp",
+        "fn",
+        "total_annotations",
+        "precision",
+        "recall",
+        "f1"
+      ],
+      "additionalProperties": false,
+      "properties": {
+        "tp": { "type": "integer", "minimum": 0 },
+        "fp": { "type": "integer", "minimum": 0 },
+        "fn": { "type": "integer", "minimum": 0 },
+        "total_annotations": { "type": "integer", "minimum": 0 },
+        "precision": { "type": "number", "minimum": 0, "maximum": 1 },
+        "recall": { "type": "number", "minimum": 0, "maximum": 1 },
+        "f1": { "type": "number", "minimum": 0, "maximum": 1 }
+      }
+    }
+  }
+}
diff --git a/scripts/collect-certs.ts b/scripts/collect-certs.ts
new file mode 100644
index 0000000..0e71d7e
--- /dev/null
+++ b/scripts/collect-certs.ts
@@ -0,0 +1,163 @@
+/**
+ * collect-certs.ts
+ *
+ * Walks the NDJSON output of `run-pipeline.ts`, extracts each row's cert URL
+ * + summary URL + recall metadata, and emits a `CERTIFICATES.csv` appendix
+ * suitable for the paper.
+ *
+ * Columns:
+ *   row_index, cert_url, cert_id, summary_url, overall_verdict,
+ *   redaction_count, latency_ms, timestamp_utc, error_code
+ *
+ * `overall_verdict` is a per-row tag derived from the gateway-attested
+ * ground_truth_evaluation:
+ *   - "verified" when total_annotations > 0 and detection_rate == 1.0
+ *   - "partial" when total_annotations > 0 and 0 < detection_rate < 1.0
+ *   - "miss" when total_annotations > 0 and detection_rate == 0
+ *   - "n/a" when total_annotations == 0 (no ground truth submitted)
+ *   - "error" when the row carried an error block
+ *
+ * Cert ID is parsed from the certificate_url's final path segment to keep
+ * the CSV readable without re-parsing URLs.
+ */
+
+import { readFile, writeFile } from 'node:fs/promises';
+
+import { emitCsv } from '../src/csv.js';
+import type { GroundTruthEvaluation } from '../src/gateway-client.js';
+
+interface PipelineNdjsonRecord {
+  row_index: number;
+  timestamp_utc: string;
+  entities_submitted?: number;
+  transcription_length?: number;
+  gateway?: string;
+  mode?: 'mock' | 'live';
+  result: {
+    row_index: number;
+    request_id: string;
+    cert_url: string | null;
+    summary_url: string | null;
+    evaluation: GroundTruthEvaluation | null;
+    redaction_count: number | null;
+    latency_ms: number | null;
+  } | null;
+  error: { code: string; message: string } | null;
+}
+
+interface CliArgs {
+  input: string;
+  output: string;
+}
+
+function parseArgs(argv: readonly string[]): CliArgs {
+  const args: CliArgs = { input: '', output: '' };
+  for (const raw of argv) {
+    const eq = raw.indexOf('=');
+    const key = eq === -1 ? raw : raw.slice(0, eq);
+    const val = eq === -1 ? '' : raw.slice(eq + 1);
+    switch (key) {
+      case '--input':
+        args.input = val;
+        break;
+      case '--output':
+        args.output = val;
+        break;
+      case '--help':
+      case '-h':
+        process.stdout.write(
+          'Usage: pnpm run collect-certs -- --input=<run.ndjson> --output=<CERTIFICATES.csv>\n',
+        );
+        process.exit(0);
+        break;
+      default:
+        if (raw.length > 0 && raw !== '--') {
+          throw new Error(`unknown argument: ${raw}`);
+        }
+    }
+  }
+  if (args.input === '' || args.output === '') {
+    throw new Error('--input and --output are required');
+  }
+  return args;
+}
+
+function classifyVerdict(
+  evaluation: GroundTruthEvaluation | null,
+  error: { code: string } | null,
+): string {
+  if (error !== null) return 'error';
+  if (evaluation === null) return 'n/a';
+  if (evaluation.total_annotations === 0) return 'n/a';
+  if (evaluation.detection_rate >= 1) return 'verified';
+  if (evaluation.detection_rate <= 0) return 'miss';
+  return 'partial';
+}
+
+function extractCertIdFromUrl(certUrl: string | null): string {
+  if (certUrl === null) return '';
+  const trimmed = certUrl.replace(/\/+$/u, '');
+  const last = trimmed.lastIndexOf('/');
+  return last === -1 ? trimmed : trimmed.slice(last + 1);
+}
+
+async function main(): Promise<void> {
+  const cli = parseArgs(process.argv.slice(2));
+  const text = await readFile(cli.input, 'utf8');
+  const records: PipelineNdjsonRecord[] = [];
+  let lineNo = 0;
+  for (const ln of text.split('\n')) {
+    lineNo += 1;
+    const trimmed = ln.trim();
+    if (trimmed === '') continue;
+    try {
+      records.push(JSON.parse(trimmed) as PipelineNdjsonRecord);
+    } catch (err) {
+      const reason = err instanceof Error ? err.message : String(err);
+      throw new Error(`${cli.input}: line ${lineNo} is not valid JSON: ${reason}`);
+    }
+  }
+
+  const headers = [
+    'row_index',
+    'cert_url',
+    'cert_id',
+    'summary_url',
+    'overall_verdict',
+    'redaction_count',
+    'latency_ms',
+    'timestamp_utc',
+    'error_code',
+  ];
+  const rows = records.map((r) => {
+    const certUrl = r.result?.cert_url ?? '';
+    const summaryUrl = r.result?.summary_url ?? '';
+    const evaluation = r.result?.evaluation ?? null;
+    const verdict = classifyVerdict(evaluation, r.error);
+    return {
+      row_index: String(r.row_index),
+      cert_url: certUrl,
+      cert_id: extractCertIdFromUrl(r.result?.cert_url ?? null),
+      summary_url: summaryUrl,
+      overall_verdict: verdict,
+      redaction_count: r.result?.redaction_count === null || r.result?.redaction_count === undefined
+        ? ''
+        : String(r.result.redaction_count),
+      latency_ms:
+        r.result?.latency_ms === null || r.result?.latency_ms === undefined
+          ? ''
+          : String(r.result.latency_ms),
+      timestamp_utc: r.timestamp_utc,
+      error_code: r.error?.code ?? '',
+    };
+  });
+
+  await writeFile(cli.output, emitCsv(headers, rows), 'utf8');
+  process.stdout.write(`wrote ${rows.length} cert row(s) to ${cli.output}\n`);
+}
+
+main().catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  process.stderr.write(`collect-certs: ${msg}\n`);
+  process.exit(1);
+});
diff --git a/scripts/compute-recall.ts b/scripts/compute-recall.ts
new file mode 100644
index 0000000..801dbb1
--- /dev/null
+++ b/scripts/compute-recall.ts
@@ -0,0 +1,359 @@
+/**
+ * compute-recall.ts
+ *
+ * Reads the harness's NDJSON output, extracts per-row gateway-attested
+ * ground_truth_evaluation blocks, and emits a `SUMMARY.json` aggregate
+ * recall / precision / F1 file. Validates the emitted JSON against
+ * `papers/_template/SUMMARY.schema.json`.
+ *
+ * Two input modes:
+ *
+ *   --redactions-source=ndjson  (default): reads --input NDJSON, uses the
+ *     gateway-attested evaluation blocks. This is the live path.
+ *   --redactions-source=mock: re-runs the in-process mock against the
+ *     ground-truth file, useful for math-only smoke without spinning up the
+ *     full run-pipeline harness. Configurable via --miss-rate and
+ *     --spurious-fp-count.
+ *
+ * Determinism: identical inputs produce byte-identical output (sort orders
+ * fixed: per_category in HIPAA_CATEGORIES order, per_row by row_index asc).
+ */
+
+import { readFile, writeFile } from 'node:fs/promises';
+import { fileURLToPath } from 'node:url';
+import { dirname, resolve } from 'node:path';
+
+import {
+  type GroundTruthEvaluation,
+  extractFromEvaluation,
+} from '../src/index.js';
+import { aggregateExtracted } from '../src/recall.js';
+import type { ExtractedRedaction } from '../src/redaction-extractor.js';
+import { buildMockResponse } from '../src/mocks/gateway-fixtures.js';
+import type { InjectedEntity } from '../src/inject-pii-core.js';
+
+interface CliArgs {
+  truth: string;
+  input: string | null;
+  redactionsSource: 'ndjson' | 'mock';
+  output: string;
+  rows: number | null;
+  missRate: number;
+  spuriousFpCount: number;
+}
+
+function parseArgs(argv: readonly string[]): CliArgs {
+  const args: CliArgs = {
+    truth: 'datasets/healthcare/with-injected-pii/ground-truth.jsonl',
+    input: null,
+    redactionsSource: 'ndjson',
+    output: 'papers/paper-1-healthcare/SUMMARY.json',
+    rows: null,
+    missRate: 0,
+    spuriousFpCount: 0,
+  };
+  for (const raw of argv) {
+    const eq = raw.indexOf('=');
+    const key = eq === -1 ? raw : raw.slice(0, eq);
+    const val = eq === -1 ? '' : raw.slice(eq + 1);
+    switch (key) {
+      case '--truth':
+        args.truth = val;
+        break;
+      case '--input':
+        args.input = val;
+        break;
+      case '--redactions-source':
+        if (val !== 'ndjson' && val !== 'mock') {
+          throw new Error('--redactions-source must be "ndjson" or "mock"');
+        }
+        args.redactionsSource = val;
+        break;
+      case '--output':
+        args.output = val;
+        break;
+      case '--rows': {
+        const n = Number.parseInt(val, 10);
+        if (!Number.isFinite(n) || n < 0) {
+          throw new Error('--rows must be a non-negative integer');
+        }
+        args.rows = n;
+        break;
+      }
+      case '--miss-rate': {
+        const f = Number.parseFloat(val);
+        if (!Number.isFinite(f) || f < 0 || f > 1) {
+          throw new Error('--miss-rate must be in [0, 1]');
+        }
+        args.missRate = f;
+        break;
+      }
+      case '--spurious-fp-count': {
+        const n = Number.parseInt(val, 10);
+        if (!Number.isFinite(n) || n < 0) {
+          throw new Error('--spurious-fp-count must be a non-negative integer');
+        }
+        args.spuriousFpCount = n;
+        break;
+      }
+      case '--help':
+      case '-h':
+        process.stdout.write(
+          'Usage: pnpm run compute-recall -- --truth=<ground-truth.jsonl> ' +
+            '[--input=<run.ndjson> | --redactions-source=mock] [--rows=N] --output=<SUMMARY.json>\n',
+        );
+        process.exit(0);
+        break;
+      default:
+        if (raw.length > 0 && raw !== '--') {
+          throw new Error(`unknown argument: ${raw}`);
+        }
+    }
+  }
+  if (args.redactionsSource === 'ndjson' && args.input === null) {
+    throw new Error('--input is required when --redactions-source=ndjson');
+  }
+  return args;
+}
+
+async function loadGroundTruth(
+  path: string,
+): Promise<Array<{ row_index: number; entities: InjectedEntity[] }>> {
+  const text = await readFile(path, 'utf8');
+  const out: Array<{ row_index: number; entities: InjectedEntity[] }> = [];
+  let lineNo = 0;
+  for (const ln of text.split('\n')) {
+    lineNo += 1;
+    const trimmed = ln.trim();
+    if (trimmed === '') continue;
+    let parsed: { row_index: unknown; entities: unknown };
+    try {
+      parsed = JSON.parse(trimmed) as { row_index: unknown; entities: unknown };
+    } catch (err) {
+      const reason = err instanceof Error ? err.message : String(err);
+      throw new Error(`${path}: line ${lineNo} not JSON: ${reason}`);
+    }
+    if (typeof parsed.row_index !== 'number' || !Array.isArray(parsed.entities)) continue;
+    const entities: InjectedEntity[] = [];
+    for (const item of parsed.entities as unknown[]) {
+      if (typeof item !== 'object' || item === null) continue;
+      const e = item as Partial<InjectedEntity>;
+      if (
+        typeof e.category === 'string' &&
+        typeof e.value === 'string' &&
+        typeof e.start_char === 'number' &&
+        typeof e.end_char === 'number'
+      ) {
+        entities.push({
+          category: e.category,
+          value: e.value,
+          start_char: e.start_char,
+          end_char: e.end_char,
+        });
+      }
+    }
+    out.push({ row_index: parsed.row_index, entities });
+  }
+  return out;
+}
+
+async function loadEvaluationsFromNdjson(
+  path: string,
+): Promise<Map<number, GroundTruthEvaluation>> {
+  const text = await readFile(path, 'utf8');
+  const out = new Map<number, GroundTruthEvaluation>();
+  let lineNo = 0;
+  for (const ln of text.split('\n')) {
+    lineNo += 1;
+    const trimmed = ln.trim();
+    if (trimmed === '') continue;
+    let parsed: { row_index?: unknown; result?: unknown };
+    try {
+      parsed = JSON.parse(trimmed) as { row_index?: unknown; result?: unknown };
+    } catch (err) {
+      const reason = err instanceof Error ? err.message : String(err);
+      throw new Error(`${path}: line ${lineNo} not JSON: ${reason}`);
+    }
+    if (typeof parsed.row_index !== 'number') continue;
+    const result = parsed.result;
+    if (typeof result !== 'object' || result === null) continue;
+    const evaluation = (result as { evaluation?: GroundTruthEvaluation | null }).evaluation;
+    if (evaluation !== null && evaluation !== undefined) {
+      out.set(parsed.row_index, evaluation);
+    }
+  }
+  return out;
+}
+
+/**
+ * Minimal in-process JSON-Schema validator covering the subset of Draft
+ * 2020-12 used by SUMMARY.schema.json. Avoids a runtime dep on ajv for a
+ * single schema. Throws on the first failure with a JSON-pointer-ish path.
+ */
+function validateAgainstSchema(json: unknown, schemaPath: string): Promise<void> {
+  return (async () => {
+    const schemaText = await readFile(schemaPath, 'utf8');
+    const schema = JSON.parse(schemaText) as unknown;
+    validateNode(json, schema, '#');
+  })();
+}
+
+interface Schema {
+  type?: string;
+  required?: readonly string[];
+  additionalProperties?: boolean;
+  properties?: Readonly<Record<string, Schema>>;
+  $ref?: string;
+  $defs?: Readonly<Record<string, Schema>>;
+  enum?: readonly unknown[];
+  const?: unknown;
+  minimum?: number;
+  maximum?: number;
+  minItems?: number;
+  maxItems?: number;
+  items?: Schema;
+}
+
+function getDefs(schema: Schema, root: Schema | null): Schema['$defs'] | undefined {
+  return root?.$defs ?? schema.$defs;
+}
+
+let SCHEMA_ROOT: Schema | null = null;
+
+function validateNode(node: unknown, schema: unknown, ptr: string): void {
+  if (typeof schema !== 'object' || schema === null) return;
+  const s = schema as Schema;
+  if (SCHEMA_ROOT === null) SCHEMA_ROOT = s;
+  if (s.$ref !== undefined) {
+    const defs = getDefs(s, SCHEMA_ROOT);
+    const refName = s.$ref.replace(/^#\/\$defs\//u, '');
+    const target = defs?.[refName];
+    if (target === undefined) {
+      throw new Error(`schema: unresolved $ref ${s.$ref} at ${ptr}`);
+    }
+    validateNode(node, target, ptr);
+    return;
+  }
+  if (s.type === 'object') {
+    if (typeof node !== 'object' || node === null || Array.isArray(node)) {
+      throw new Error(`schema: expected object at ${ptr}, got ${typeof node}`);
+    }
+    const obj = node as Record<string, unknown>;
+    for (const req of s.required ?? []) {
+      if (!(req in obj)) {
+        throw new Error(`schema: missing required property "${req}" at ${ptr}`);
+      }
+    }
+    const props = s.properties ?? {};
+    if (s.additionalProperties === false) {
+      for (const k of Object.keys(obj)) {
+        if (!(k in props)) {
+          throw new Error(`schema: unexpected property "${k}" at ${ptr}`);
+        }
+      }
+    }
+    for (const [k, sub] of Object.entries(props)) {
+      if (k in obj) validateNode(obj[k], sub, `${ptr}/${k}`);
+    }
+    return;
+  }
+  if (s.type === 'array') {
+    if (!Array.isArray(node)) {
+      throw new Error(`schema: expected array at ${ptr}`);
+    }
+    if (s.minItems !== undefined && node.length < s.minItems) {
+      throw new Error(`schema: array at ${ptr} has ${node.length} items, min ${s.minItems}`);
+    }
+    if (s.maxItems !== undefined && node.length > s.maxItems) {
+      throw new Error(`schema: array at ${ptr} has ${node.length} items, max ${s.maxItems}`);
+    }
+    if (s.items) {
+      for (let i = 0; i < node.length; i++) {
+        validateNode(node[i], s.items, `${ptr}/${i}`);
+      }
+    }
+    return;
+  }
+  if (s.type === 'integer') {
+    if (typeof node !== 'number' || !Number.isInteger(node)) {
+      throw new Error(`schema: expected integer at ${ptr}`);
+    }
+  }
+  if (s.type === 'number') {
+    if (typeof node !== 'number' || !Number.isFinite(node)) {
+      throw new Error(`schema: expected number at ${ptr}`);
+    }
+  }
+  if (s.type === 'string') {
+    if (typeof node !== 'string') {
+      throw new Error(`schema: expected string at ${ptr}`);
+    }
+  }
+  if (s.minimum !== undefined && typeof node === 'number' && node < s.minimum) {
+    throw new Error(`schema: ${ptr} below minimum ${s.minimum} (got ${node})`);
+  }
+  if (s.maximum !== undefined && typeof node === 'number' && node > s.maximum) {
+    throw new Error(`schema: ${ptr} above maximum ${s.maximum} (got ${node})`);
+  }
+  if (s.const !== undefined && node !== s.const) {
+    throw new Error(`schema: ${ptr} expected const ${JSON.stringify(s.const)}, got ${JSON.stringify(node)}`);
+  }
+  if (s.enum !== undefined && !s.enum.includes(node)) {
+    throw new Error(`schema: ${ptr} value ${JSON.stringify(node)} not in enum`);
+  }
+}
+
+function defaultSchemaPath(): string {
+  const here = dirname(fileURLToPath(import.meta.url));
+  return resolve(here, '..', 'papers', '_template', 'SUMMARY.schema.json');
+}
+
+async function main(): Promise<void> {
+  const cli = parseArgs(process.argv.slice(2));
+  const groundTruth = await loadGroundTruth(cli.truth);
+  const limit = cli.rows ?? groundTruth.length;
+  const targetRows = groundTruth.slice(0, limit);
+
+  const extracted: ExtractedRedaction[] = [];
+  if (cli.redactionsSource === 'mock') {
+    for (const row of targetRows) {
+      const mockResponse = buildMockResponse({
+        rowIndex: row.row_index,
+        entities: row.entities,
+        missRate: cli.missRate,
+        spuriousFpCount: cli.spuriousFpCount,
+      });
+      const evaluation = mockResponse.ground_truth_evaluation;
+      if (evaluation === undefined) continue;
+      extracted.push(...extractFromEvaluation(row.row_index, evaluation));
+    }
+  } else {
+    if (cli.input === null) {
+      throw new Error('--input is required when --redactions-source=ndjson');
+    }
+    const evals = await loadEvaluationsFromNdjson(cli.input);
+    for (const row of targetRows) {
+      const evaluation = evals.get(row.row_index);
+      if (evaluation === undefined) continue;
+      extracted.push(...extractFromEvaluation(row.row_index, evaluation));
+    }
+  }
+
+  const summary = aggregateExtracted(extracted, [
+    `Source: ${cli.redactionsSource}; rows processed: ${targetRows.length}.`,
+  ]);
+  await writeFile(cli.output, JSON.stringify(summary, null, 2) + '\n', 'utf8');
+  await validateAgainstSchema(summary, defaultSchemaPath());
+
+  process.stdout.write(
+    `wrote SUMMARY.json (${targetRows.length} rows, ` +
+      `overall recall=${summary.overall.recall.toFixed(4)}, ` +
+      `f1=${summary.overall.f1.toFixed(4)}) to ${cli.output}\n`,
+  );
+}
+
+main().catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  process.stderr.write(`compute-recall: ${msg}\n`);
+  process.exit(1);
+});
diff --git a/scripts/run-pipeline.ts b/scripts/run-pipeline.ts
new file mode 100644
index 0000000..af0f43c
--- /dev/null
+++ b/scripts/run-pipeline.ts
@@ -0,0 +1,374 @@
+/**
+ * run-pipeline.ts
+ *
+ * Slice 2 harness — call the Lucairn gateway row-by-row over the
+ * Measurement-B 500-row subset (or a smaller --rows slice), recording each
+ * gateway response to an NDJSON file under `papers/paper-1-healthcare/raw-
+ * results/`. Designed to run in two modes:
+ *
+ *   - LIVE (default): hits a real gateway at LUCAIRN_GATEWAY_URL with an
+ *     LUCAIRN_API_KEY. Live runs are deferred to Slice 3 per the locked
+ *     halt gate. Do not run live by accident — the script refuses to start
+ *     without an explicit --live flag.
+ *   - MOCK (--mock): mounts a deterministic msw fixture server in-process.
+ *     The harness fetches the loopback `mock://` URL the msw handler
+ *     intercepts. No network egress. The mock honours `--miss-rate` and
+ *     `--spurious-fp-count` so smoke tests can drive recall paths against
+ *     a known oracle.
+ *
+ * Usage:
+ *   pnpm run pipeline -- --rows=5 --mock --output=/tmp/slice2-smoke.ndjson
+ *   pnpm run pipeline -- --rows=500 --mock --output=papers/paper-1-healthcare/raw-results/mock-500.ndjson
+ *   pnpm run pipeline -- --live --rows=20    # Slice 3 only
+ */
+
+import { mkdir, readFile, writeFile } from 'node:fs/promises';
+import { existsSync } from 'node:fs';
+import { dirname, resolve } from 'node:path';
+import { setupServer } from 'msw/node';
+import { http, HttpResponse } from 'msw';
+
+import {
+  GatewayClientError,
+  type GatewayRowResult,
+  makeGatewayClient,
+  readGatewayEnv,
+} from '../src/gateway-client.js';
+import type { InjectedEntity } from '../src/inject-pii-core.js';
+import { parseCsv } from '../src/csv.js';
+import { buildMockResponse, entitiesFromRequestBody } from '../src/mocks/gateway-fixtures.js';
+
+const DEFAULT_TRUTH_PATH =
+  'datasets/healthcare/with-injected-pii/ground-truth.jsonl';
+const DEFAULT_SUBSET_PATH =
+  'datasets/healthcare/with-injected-pii/measurement-b-subset.csv';
+const MOCK_GATEWAY_URL = 'http://mock.lucairn.local';
+const MOCK_API_KEY = 'lcr_live_mock_0000000000000000000000000000';
+
+interface CliArgs {
+  rows: number | null;
+  mock: boolean;
+  live: boolean;
+  truth: string;
+  subset: string;
+  output: string;
+  gateway: string | null;
+  apiKey: string | null;
+  missRate: number;
+  spuriousFpCount: number;
+  activityIdPrefix: string;
+}
+
+function parseArgs(argv: readonly string[]): CliArgs {
+  const args: CliArgs = {
+    rows: null,
+    mock: false,
+    live: false,
+    truth: DEFAULT_TRUTH_PATH,
+    subset: DEFAULT_SUBSET_PATH,
+    output: `papers/paper-1-healthcare/raw-results/run-${new Date()
+      .toISOString()
+      .replace(/[:.]/gu, '-')}.ndjson`,
+    gateway: null,
+    apiKey: null,
+    missRate: 0,
+    spuriousFpCount: 0,
+    activityIdPrefix: 'paper-1-healthcare',
+  };
+  for (const raw of argv) {
+    const eq = raw.indexOf('=');
+    const key = eq === -1 ? raw : raw.slice(0, eq);
+    const val = eq === -1 ? '' : raw.slice(eq + 1);
+    switch (key) {
+      case '--rows':
+        args.rows = parseIntOrThrow(val, '--rows');
+        break;
+      case '--mock':
+        args.mock = true;
+        break;
+      case '--live':
+        args.live = true;
+        break;
+      case '--truth':
+        args.truth = val;
+        break;
+      case '--subset':
+        args.subset = val;
+        break;
+      case '--output':
+        args.output = val;
+        break;
+      case '--gateway':
+        args.gateway = val;
+        break;
+      case '--api-key':
+        args.apiKey = val;
+        break;
+      case '--miss-rate':
+        args.missRate = parseFloatOrThrow(val, '--miss-rate');
+        break;
+      case '--spurious-fp-count':
+        args.spuriousFpCount = parseIntOrThrow(val, '--spurious-fp-count');
+        break;
+      case '--activity-id-prefix':
+        args.activityIdPrefix = val;
+        break;
+      case '--help':
+      case '-h':
+        printHelp();
+        process.exit(0);
+        break;
+      default:
+        if (raw.length > 0 && raw !== '--') {
+          throw new Error(`unknown argument: ${raw}`);
+        }
+    }
+  }
+  return args;
+}
+
+function parseIntOrThrow(s: string, flag: string): number {
+  const n = Number.parseInt(s, 10);
+  if (!Number.isFinite(n) || n < 0) throw new Error(`${flag} requires a non-negative integer`);
+  return n;
+}
+
+function parseFloatOrThrow(s: string, flag: string): number {
+  const n = Number.parseFloat(s);
+  if (!Number.isFinite(n) || n < 0 || n > 1) {
+    throw new Error(`${flag} requires a number in [0, 1]`);
+  }
+  return n;
+}
+
+function printHelp(): void {
+  const lines = [
+    'Usage: pnpm run pipeline -- [options]',
+    '',
+    'Options:',
+    '  --rows=N             limit run to first N rows (sorted by row_index). Default: all rows in ground truth.',
+    '  --mock               mount msw mock; no network egress. Mutually exclusive with --live.',
+    '  --live               require LUCAIRN_GATEWAY_URL + LUCAIRN_API_KEY in env (Slice 3 use).',
+    '  --truth=PATH         ground-truth JSONL path. Default: datasets/healthcare/with-injected-pii/ground-truth.jsonl',
+    '  --subset=PATH        Measurement-B subset CSV path. Default: datasets/healthcare/with-injected-pii/measurement-b-subset.csv',
+    '  --output=PATH        NDJSON output path. Default: papers/paper-1-healthcare/raw-results/run-<ISO>.ndjson',
+    '  --gateway=URL        gateway URL override (also honoured under --live).',
+    '  --api-key=KEY        API key override (--live only).',
+    '  --miss-rate=F        --mock only. Fraction of injected entities the mock misses. Default: 0.',
+    '  --spurious-fp-count=N --mock only. Synthetic FP redactions per row. Default: 0.',
+    '  --activity-id-prefix=S  per-row activity_id prefix. Default: paper-1-healthcare.',
+    '',
+    'Slice 2 ships --mock support only. --live is reserved for Slice 3 and requires Marc-confirmation.',
+  ];
+  for (const ln of lines) {
+    process.stdout.write(`${ln}\n`);
+  }
+}
+
+async function loadGroundTruth(path: string): Promise<Map<number, readonly InjectedEntity[]>> {
+  const text = await readFile(path, 'utf8');
+  const out = new Map<number, InjectedEntity[]>();
+  let lineNo = 0;
+  for (const ln of text.split('\n')) {
+    lineNo += 1;
+    const trimmed = ln.trim();
+    if (trimmed === '') continue;
+    let parsed: { row_index: unknown; entities: unknown };
+    try {
+      parsed = JSON.parse(trimmed) as { row_index: unknown; entities: unknown };
+    } catch (err) {
+      const reason = err instanceof Error ? err.message : String(err);
+      throw new Error(`ground truth line ${lineNo} is not valid JSON: ${reason}`);
+    }
+    if (typeof parsed.row_index !== 'number' || !Array.isArray(parsed.entities)) {
+      throw new Error(`ground truth line ${lineNo} missing row_index or entities`);
+    }
+    const entities: InjectedEntity[] = [];
+    for (const item of parsed.entities as unknown[]) {
+      if (typeof item !== 'object' || item === null) continue;
+      const e = item as {
+        category?: unknown;
+        value?: unknown;
+        start_char?: unknown;
+        end_char?: unknown;
+      };
+      if (
+        typeof e.category === 'string' &&
+        typeof e.value === 'string' &&
+        typeof e.start_char === 'number' &&
+        typeof e.end_char === 'number'
+      ) {
+        entities.push({
+          // The injected categories are HipaaCategory by construction; we
+          // intentionally avoid a runtime narrowing assertion so a malformed
+          // ground-truth line surfaces in the recall computation rather than
+          // at parse time.
+          category: e.category as InjectedEntity['category'],
+          value: e.value,
+          start_char: e.start_char,
+          end_char: e.end_char,
+        });
+      }
+    }
+    out.set(parsed.row_index, entities);
+  }
+  return out;
+}
+
+async function loadTranscriptions(path: string): Promise<Map<number, string>> {
+  const text = await readFile(path, 'utf8');
+  const { rows } = parseCsv(text);
+  const out = new Map<number, string>();
+  for (const row of rows) {
+    const idxStr = row['original_row_index'] ?? '';
+    const idx = Number.parseInt(idxStr, 10);
+    if (!Number.isFinite(idx)) continue;
+    const tr = row['transcription'] ?? '';
+    out.set(idx, tr);
+  }
+  return out;
+}
+
+interface MockServerHandle {
+  close(): void;
+}
+
+function mountMockServer(missRate: number, spuriousFpCount: number): MockServerHandle {
+  const handlers = [
+    http.post(
+      `${MOCK_GATEWAY_URL}/api/v1/proxy/messages`,
+      async ({ request }) => {
+        const body = (await request.json()) as unknown;
+        const { rowIndex, entities } = entitiesFromRequestBody(body);
+        if (rowIndex === null) {
+          return HttpResponse.json(
+            { error: { code: 'invalid_body', message: 'mock could not parse activity_id row-N suffix' } },
+            { status: 400 },
+          );
+        }
+        const response = buildMockResponse({
+          rowIndex,
+          entities,
+          missRate,
+          spuriousFpCount,
+        });
+        return HttpResponse.json(response);
+      },
+    ),
+  ];
+  const server = setupServer(...handlers);
+  server.listen({ onUnhandledRequest: 'error' });
+  return { close: () => server.close() };
+}
+
+async function ensureOutputDir(outputPath: string): Promise<void> {
+  const dir = dirname(resolve(outputPath));
+  if (!existsSync(dir)) {
+    await mkdir(dir, { recursive: true });
+  }
+}
+
+async function main(): Promise<void> {
+  const cli = parseArgs(process.argv.slice(2));
+  if (cli.mock && cli.live) {
+    throw new Error('--mock and --live are mutually exclusive');
+  }
+  if (!cli.mock && !cli.live) {
+    process.stderr.write(
+      'run-pipeline: neither --mock nor --live specified. Slice 2 supports --mock only.\n' +
+        'Add --mock for the in-process smoke flow, or --live (Slice 3 + Marc-confirmation).\n',
+    );
+    process.exit(2);
+  }
+
+  const truthByRow = await loadGroundTruth(cli.truth);
+  const transcriptByRow = await loadTranscriptions(cli.subset);
+  const indices = Array.from(truthByRow.keys()).sort((a, b) => a - b);
+  const limit = cli.rows ?? indices.length;
+  const target = indices.slice(0, limit);
+
+  let mock: MockServerHandle | null = null;
+  let gatewayUrl: string;
+  let apiKey: string;
+  if (cli.mock) {
+    mock = mountMockServer(cli.missRate, cli.spuriousFpCount);
+    gatewayUrl = MOCK_GATEWAY_URL;
+    apiKey = MOCK_API_KEY;
+  } else {
+    const env = readGatewayEnv();
+    gatewayUrl = cli.gateway ?? env.gatewayUrl ?? '';
+    apiKey = cli.apiKey ?? env.apiKey ?? '';
+    if (gatewayUrl === '' || apiKey === '') {
+      throw new Error(
+        '--live requires LUCAIRN_GATEWAY_URL + LUCAIRN_API_KEY in env or --gateway / --api-key flags',
+      );
+    }
+  }
+
+  await ensureOutputDir(cli.output);
+
+  const client = makeGatewayClient({
+    gatewayUrl,
+    apiKey,
+    activityIdPrefix: cli.activityIdPrefix,
+  });
+
+  const writer = await import('node:fs/promises');
+  let written = 0;
+  const startedAt = Date.now();
+  const records: string[] = [];
+  for (const rowIndex of target) {
+    const entities = truthByRow.get(rowIndex) ?? [];
+    const transcription = transcriptByRow.get(rowIndex) ?? '';
+    let result: GatewayRowResult | null = null;
+    let error: { code: string; message: string } | null = null;
+    try {
+      result = await client.runRow({
+        row_index: rowIndex,
+        transcription,
+        entities,
+      });
+    } catch (err) {
+      if (err instanceof GatewayClientError) {
+        error = {
+          code: 'gateway_error',
+          message: `${err.message} (status=${err.status ?? 'null'})`,
+        };
+      } else if (err instanceof Error) {
+        error = { code: 'unknown_error', message: err.message };
+      } else {
+        error = { code: 'unknown_error', message: String(err) };
+      }
+    }
+    const ndjsonLine = JSON.stringify({
+      row_index: rowIndex,
+      timestamp_utc: new Date().toISOString(),
+      entities_submitted: entities.length,
+      transcription_length: transcription.length,
+      gateway: gatewayUrl,
+      mode: cli.mock ? 'mock' : 'live',
+      mock_miss_rate: cli.mock ? cli.missRate : null,
+      mock_spurious_fp_count: cli.mock ? cli.spuriousFpCount : null,
+      result,
+      error,
+    });
+    records.push(ndjsonLine);
+    written += 1;
+  }
+  await writer.writeFile(cli.output, records.join('\n') + '\n', 'utf8');
+
+  const elapsedMs = Date.now() - startedAt;
+  process.stdout.write(
+    `wrote ${written} record(s) to ${cli.output} in ${elapsedMs} ms (mode=${
+      cli.mock ? 'mock' : 'live'
+    })\n`,
+  );
+
+  mock?.close();
+}
+
+main().catch((err: unknown) => {
+  const msg = err instanceof Error ? err.message : String(err);
+  process.stderr.write(`run-pipeline: ${msg}\n`);
+  process.exit(1);
+});

From 5cde321d87c7b30f81f57ae3c27cabed68229dab Mon Sep 17 00:00:00 2001
From: Declade <110547349+Declade@users.noreply.github.com>
Date: Sun, 17 May 2026 11:31:48 +0200
Subject: [PATCH 3/5] test(slice-2): gateway-client / redaction-extractor /
 recall specs + docs update
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the three test files covering the new Slice 2 surface and updates
README + RECIPE for the shipped state.

Tests (22 new, 34 total with Slice 1's 12):

- test/gateway-client.spec.ts (8 tests) — msw-mocked. Locks the
  proving-ground request shape (mode, relink_response=false, activity_id
  pattern, ground_truth.transcription[] HIPAA-tagged annotations,
  x-api-key header). Verifies: success path returns a typed
  GatewayRowResult; retries on 5xx and recovers (exact backoff math
  asserted); does NOT retry on 4xx; fails-with-error after exhausting
  retry budget; abort/timeout is retry-eligible; extractCertUrls handles
  the missing-veil-hint case; construction validation refuses empty URL
  / empty key.
- test/redaction-extractor.spec.ts (9 tests) — locks the placeholder
  parser against malformed inputs; verifies the HIPAA mapping covers the
  standard Presidio + Lucairn vocabulary (PERSON, LOCATION, DATE,
  PHONE_NUMBER, EMAIL_ADDRESS, US_SSN, IBAN, URL, IP_ADDRESS,
  CREDIT_CARD); every entry in LUCAIRN_TO_HIPAA maps to a valid
  HipaaCategory; extractFromEvaluation flattens matches/missed/extras
  into ExtractedRedaction[] with verdicts; unknown annotation_type from
  the gateway is tagged null (no silent widening); unmappedExtraTypes
  surfaces taxonomy drift.
- test/recall.spec.ts (5 tests) — 5 rows, 22 entities, hand-tagged
  TP/FN/FP. Exact per-category recall/precision/F1 numbers asserted:
  NAME 5 TP / 1 FN → recall 5/6; EMAIL 2 TP → recall 1; DATE 3 TP / 1 FN
  / 1 FP → recall 0.75 precision 0.75; PHONE 0 TP / 2 FP → precision 0;
  GEO 4 TP / 1 FN → recall 0.8. Overall TP=15 FP=3 FN=3 → recall 15/18.
  Locks the SPAN_OVERLAP_THRESHOLD const at 0.5 with a regression test.
  computeRecallFromSpans is exercised with a single-row synthetic
  fixture covering exact-50%-overlap (matches), 100%-overlap (matches),
  40%-overlap (FP + FN). Per-row order ascending by row_index asserted.
  Unmapped-category counts get a "no HIPAA category mapping" note.

Docs:

- README.md — appends a Slice 2 — Harness section under Reproduce Paper 1
  documenting the mock-only workflow, all three CLI commands with
  --rows=5 examples, the --miss-rate / --spurious-fp-count options, and
  the explicit "live gateway run lands in Slice 3" framing required by
  the PRD halt gate. Refines two pre-existing negative-disclaimer lines
  to avoid the locked banned literals "case study" + "testimonial"
  while preserving meaning.
- datasets/healthcare/RECIPE.md — flips the Slice-status timeline entry
  for Slice 2 from "pending" to "shipped (mock-only)", enumerates the
  Slice 2 source files, and updates the Slice 3 description.
- .gitignore — narrows `papers/*/raw-results/` to its contents and
  exempts the directory scaffold (`.gitignore` + `.gitkeep`) so the
  per-paper run-results directory exists in a fresh clone.

End-to-end smoke (all PASS):
  pnpm install --frozen-lockfile      → exit 0
  pnpm typecheck                       → exit 0
  pnpm typecheck:test                  → exit 0
  pnpm build                           → exit 0
  pnpm test (34 tests across 6 files)  → exit 0
  Banned-literal sweep                 → 0 hits
---
 .gitignore                                    |   5 +-
 README.md                                     |  32 ++-
 datasets/healthcare/RECIPE.md                 |   8 +-
 .../paper-1-healthcare/raw-results/.gitignore |   3 +
 .../paper-1-healthcare/raw-results/.gitkeep   |   0
 test/gateway-client.spec.ts                   | 223 ++++++++++++++++++
 test/recall.spec.ts                           | 200 ++++++++++++++++
 test/redaction-extractor.spec.ts              | 136 +++++++++++
 8 files changed, 600 insertions(+), 7 deletions(-)
 create mode 100644 papers/paper-1-healthcare/raw-results/.gitignore
 create mode 100644 papers/paper-1-healthcare/raw-results/.gitkeep
 create mode 100644 test/gateway-client.spec.ts
 create mode 100644 test/recall.spec.ts
 create mode 100644 test/redaction-extractor.spec.ts

diff --git a/.gitignore b/.gitignore
index 53a09e7..bb52794 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,7 +15,10 @@ datasets/*/raw/
 datasets/*/with-injected-pii/
 
 # Per-paper raw run artifacts (cert chains, intermediate JSONL — only summaries are checked in)
-papers/*/raw-results/
+papers/*/raw-results/*
+# But keep the scaffold so the directory exists in a fresh clone.
+!papers/*/raw-results/.gitignore
+!papers/*/raw-results/.gitkeep
 
 # Internal PRD/planning anchors (kept locally for fresh-context resumes)
 specs/
diff --git a/README.md b/README.md
index 9ea5f2e..9ee3967 100644
--- a/README.md
+++ b/README.md
@@ -12,9 +12,9 @@ Empirical methodology code for the Lucairn Research Program — a per-industry s
 ## What this repo is NOT
 
 - Not a Lucairn product. The Lucairn platform itself lives elsewhere (gateway, sanitizer, witness, certificate verifier).
-- Not a customer-deployment artifact. These are vendor-published methodology papers; the publisher and the methodology are named in full. No customer attribution. No testimonials. No interviewed users.
+- Not a customer-deployment artifact. These are vendor-published methodology papers; the publisher and the methodology are named in full. No customer attribution. No persona-driven narrative.
 - Not a CLI or a publishable npm package. It is a methodology codebase, run from a clone.
-- Not a "case study". The artifact frame is a vendor benchmark / methodology paper; the word "case study" does not appear in any paper title, route slug, social card, or meta description.
+- Not a customer-implementation report. The artifact frame is a vendor benchmark / methodology paper; persona-driven or implementation-report framing does not appear in any paper title, route slug, social card, or meta description.
 - Not legal advice. Regulatory references are factual citations to primary sources (EUR-Lex Regulation 2024/1689; HHS HIPAA Safe Harbor enumeration; published clinical-NLP de-identification literature); they are not interpretations.
 
 ## Regulatory context
@@ -56,6 +56,34 @@ Prerequisites:
 - pnpm 10.x
 - Kaggle CLI installed (`pipx install kaggle`) with a working `~/.kaggle/kaggle.json` API token
 
+### Slice 2 — Harness (mock-only)
+
+Slice 2 adds an in-process harness that calls the Lucairn gateway row-by-row via `POST /api/v1/proxy/messages` in `mode: "proving_ground"`, collects each row's signed cert URL, and computes per-HIPAA-category recall against the Measurement-B ground truth.
+
+**The harness is currently mock-only.** The live `gateway.lucairn.eu` run lands in Slice 3 per the locked PRD halt gate (avoid Anthropic upstream cost on every iteration). Run the in-process smoke flow:
+
+```bash
+# Step 1 — call the mock gateway over 5 rows; write the raw NDJSON.
+pnpm run pipeline -- --rows=5 --mock --output=/tmp/slice2-smoke.ndjson
+
+# Step 2 — convert NDJSON to the CERTIFICATES.csv appendix shape.
+pnpm run collect-certs -- --input=/tmp/slice2-smoke.ndjson --output=/tmp/slice2-CERTIFICATES.csv
+
+# Step 3 — compute recall / precision / F1, validate against the SUMMARY schema.
+pnpm run compute-recall \
+  -- --truth=datasets/healthcare/with-injected-pii/ground-truth.jsonl \
+  --redactions-source=mock \
+  --rows=5 \
+  --output=/tmp/slice2-SUMMARY.json
+```
+
+Mock options exercise the math layer against a known oracle:
+
+- `--miss-rate=0.3` — mock drops 30% of injected entities so recall and F1 reflect the configuration.
+- `--spurious-fp-count=2` — mock emits 2 synthetic false-positive redactions per row.
+
+The harness implementation reads `LUCAIRN_GATEWAY_URL` and `LUCAIRN_API_KEY` from the environment but Slice 2 supports `--mock` only; the `--live` flag is reserved for Slice 3 and refuses to run without the explicit invocation that the live-run halt gate authorises.
+
 ## Methodology summary (Paper 1)
 
 The healthcare dataset (MTSamples) is **not institutionally de-identified**; it is raw clinical narrative from the public mtsamples.com archive (CC0 public domain). Paper 1 therefore reports two empirically distinct measurements:
diff --git a/datasets/healthcare/RECIPE.md b/datasets/healthcare/RECIPE.md
index 31349e2..22bf1da 100644
--- a/datasets/healthcare/RECIPE.md
+++ b/datasets/healthcare/RECIPE.md
@@ -41,14 +41,14 @@ Because MTSamples has no published ground-truth PHI annotations, a single measur
 
 This recipe documents the *full* methodology for Paper 1. The implementation lands incrementally:
 
-- **Slice 1 (current commit) — ships:**
+- **Slice 1 — shipped:**
   - Dataset acquisition script (`scripts/download-mtsamples.ts`)
   - Deterministic synthetic PII re-injection for Measurement B's 500-row subset (`scripts/inject-pii.ts`, `src/inject-pii-core.ts`)
   - Round-trip verification (`scripts/verify-injection.ts`)
-- **Slice 2 — pending:** harness to call the Lucairn gateway row-by-row, collect cert URLs, compute recall against Measurement B's known ground truth (`scripts/run-pipeline.ts`, `scripts/collect-certs.ts`, `scripts/compute-recall.ts`)
-- **Slice 3 — pending:** full Paper 1 run including **Measurement A's raw-corpus detection pass** (Lucairn over the full ~5k MTSamples corpus, reporting detection counts without ground truth) plus the Measurement B recall numbers + the `papers/paper-1-healthcare/CERTIFICATES.csv` cert-URL appendix
+- **Slice 2 (current commit) — shipped (mock-only):** harness to call the Lucairn gateway row-by-row via `POST /api/v1/proxy/messages` in `mode: "proving_ground"`, collect cert URLs, compute recall against Measurement B's known ground truth (`scripts/run-pipeline.ts`, `scripts/collect-certs.ts`, `scripts/compute-recall.ts`, `src/gateway-client.ts`, `src/redaction-extractor.ts`, `src/recall.ts`, `src/hipaa-category-mapping.ts`, `src/mocks/gateway-fixtures.ts`). The live gateway run is deferred to Slice 3.
+- **Slice 3 — pending:** full Paper 1 run including **Measurement A's raw-corpus detection pass** (Lucairn over the full ~5k MTSamples corpus, reporting detection counts without ground truth) plus the Measurement B recall numbers against the live gateway + the `papers/paper-1-healthcare/CERTIFICATES.csv` cert-URL appendix
 
-Until Slice 2 + Slice 3 land, the harness + Measurement A code does not exist in this repo. The methodology description below is the published target, not the current shipped state.
+Until Slice 3 lands, the live-gateway end-to-end run + Measurement A code does not exist in this repo. The methodology description below is the published target, not the current shipped state.
 
 ### Measurement A — raw-corpus detection (what does Lucairn flag in the wild?)
 
diff --git a/papers/paper-1-healthcare/raw-results/.gitignore b/papers/paper-1-healthcare/raw-results/.gitignore
new file mode 100644
index 0000000..bf27f31
--- /dev/null
+++ b/papers/paper-1-healthcare/raw-results/.gitignore
@@ -0,0 +1,3 @@
+*
+!.gitignore
+!.gitkeep
diff --git a/papers/paper-1-healthcare/raw-results/.gitkeep b/papers/paper-1-healthcare/raw-results/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/test/gateway-client.spec.ts b/test/gateway-client.spec.ts
new file mode 100644
index 0000000..18d3cbe
--- /dev/null
+++ b/test/gateway-client.spec.ts
@@ -0,0 +1,223 @@
+import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest';
+import { http, HttpResponse } from 'msw';
+import { setupServer } from 'msw/node';
+
+import {
+  GatewayClientError,
+  extractCertUrls,
+  makeGatewayClient,
+} from '../src/gateway-client.js';
+import type { GatewayResponse } from '../src/gateway-client.js';
+
+const BASE_URL = 'http://gateway.test.local';
+const ENDPOINT = `${BASE_URL}/api/v1/proxy/messages`;
+const API_KEY = 'lcr_live_test_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+
+function successResponse(overrides?: Partial<GatewayResponse>): GatewayResponse {
+  return {
+    request_id: 'req_test_0001',
+    status: 'JOB_STATUS_COMPLETED',
+    latency_ms: 120,
+    redaction_count: 2,
+    ground_truth_evaluation: {
+      total_annotations: 2,
+      true_positives: 2,
+      false_negatives: 0,
+      false_positives: 0,
+      detection_rate: 1.0,
+      matches: [
+        { annotation_type: 'NAME', annotation_value: 'Jane Roe', redacted_as: '[PERSON_1]' },
+        { annotation_type: 'EMAIL', annotation_value: 'jane@example.test', redacted_as: '[EMAIL_ADDRESS_1]' },
+      ],
+      missed: [],
+      extras: [],
+    },
+    veil: {
+      status: 'available',
+      certificate_url: '/api/v1/veil/certificate/abc123',
+      summary_url: '/api/v1/veil/certificate/abc123/summary',
+    },
+    ...overrides,
+  };
+}
+
+const server = setupServer();
+beforeAll(() => server.listen({ onUnhandledRequest: 'error' }));
+afterEach(() => server.resetHandlers());
+afterAll(() => server.close());
+
+describe('makeGatewayClient', () => {
+  it('parses a successful proving-ground response into a GatewayRowResult', async () => {
+    server.use(
+      http.post(ENDPOINT, async ({ request }) => {
+        const body = (await request.json()) as Record<string, unknown>;
+        // Confirm the harness emits the locked proving-ground request shape.
+        expect(body['mode']).toBe('proving_ground');
+        expect(body['relink_response']).toBe(false);
+        expect(body['activity_id']).toBe('paper-1-healthcare-row-7');
+        // ground_truth.transcription carries HIPAA-tagged annotations.
+        const gt = body['ground_truth'] as { transcription: unknown[] };
+        expect(Array.isArray(gt.transcription)).toBe(true);
+        expect(gt.transcription.length).toBe(1);
+        const ann = gt.transcription[0] as Record<string, unknown>;
+        expect(ann['type']).toBe('NAME');
+        expect(ann['value']).toBe('Jane Roe');
+        expect(ann['start']).toBe(10);
+        expect(ann['end']).toBe(18);
+        // Verify auth header carries the API key.
+        expect(request.headers.get('x-api-key')).toBe(API_KEY);
+        return HttpResponse.json(successResponse());
+      }),
+    );
+
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      sleepFn: async () => undefined,
+    });
+    const result = await client.runRow({
+      row_index: 7,
+      transcription: 'A note about Jane Roe at jane@example.test in ward 3.',
+      entities: [
+        { category: 'NAME', value: 'Jane Roe', start_char: 10, end_char: 18 },
+      ],
+    });
+    expect(result.row_index).toBe(7);
+    expect(result.cert_url).toBe('/api/v1/veil/certificate/abc123');
+    expect(result.summary_url).toBe('/api/v1/veil/certificate/abc123/summary');
+    expect(result.redaction_count).toBe(2);
+    expect(result.evaluation?.true_positives).toBe(2);
+    expect(result.evaluation?.matches?.[0]?.annotation_type).toBe('NAME');
+  });
+
+  it('retries on 5xx and recovers, respecting backoff', async () => {
+    let calls = 0;
+    server.use(
+      http.post(ENDPOINT, () => {
+        calls += 1;
+        if (calls < 3) {
+          return HttpResponse.json({ error: 'transient' }, { status: 502 });
+        }
+        return HttpResponse.json(successResponse());
+      }),
+    );
+
+    const sleeps: number[] = [];
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      maxRetries: 3,
+      backoffBaseMs: 10,
+      backoffJitterMs: 5,
+      sleepFn: async (ms) => {
+        sleeps.push(ms);
+      },
+      randomFn: () => 0.5,
+    });
+    const result = await client.runRow({
+      row_index: 0,
+      transcription: 'short',
+      entities: [],
+    });
+    expect(calls).toBe(3);
+    expect(sleeps.length).toBe(2);
+    // First retry backoff is base*2^0 + 0.5*jitter = 10 + 2.5 -> 12.
+    expect(sleeps[0]).toBe(12);
+    // Second retry backoff is base*2^1 + 0.5*jitter = 20 + 2.5 -> 22.
+    expect(sleeps[1]).toBe(22);
+    expect(result.request_id).toBe('req_test_0001');
+  });
+
+  it('does NOT retry on 4xx — surfaces a GatewayClientError with the status', async () => {
+    let calls = 0;
+    server.use(
+      http.post(ENDPOINT, () => {
+        calls += 1;
+        return HttpResponse.json({ error: { code: 'invalid_field' } }, { status: 400 });
+      }),
+    );
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      sleepFn: async () => undefined,
+    });
+    await expect(
+      client.runRow({ row_index: 0, transcription: 'x', entities: [] }),
+    ).rejects.toThrow(GatewayClientError);
+    expect(calls).toBe(1);
+  });
+
+  it('fails after exhausting retries on persistent 5xx', async () => {
+    server.use(
+      http.post(ENDPOINT, () => HttpResponse.json({ error: 'down' }, { status: 503 })),
+    );
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      maxRetries: 2,
+      backoffBaseMs: 1,
+      backoffJitterMs: 1,
+      sleepFn: async () => undefined,
+      randomFn: () => 0,
+    });
+    let thrown: GatewayClientError | null = null;
+    try {
+      await client.runRow({ row_index: 0, transcription: 'x', entities: [] });
+    } catch (err) {
+      if (err instanceof GatewayClientError) thrown = err;
+    }
+    expect(thrown).not.toBeNull();
+    expect(thrown?.status).toBe(503);
+  });
+
+  it('treats abort/timeout as a retry-eligible failure', async () => {
+    let calls = 0;
+    server.use(
+      http.post(ENDPOINT, async ({ request }) => {
+        calls += 1;
+        if (calls === 1) {
+          // Wait long enough for the client's tiny timeout to abort.
+          await new Promise<void>((resolve) => setTimeout(resolve, 50));
+          if (request.signal.aborted) {
+            return HttpResponse.error();
+          }
+        }
+        return HttpResponse.json(successResponse());
+      }),
+    );
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      maxRetries: 2,
+      backoffBaseMs: 1,
+      backoffJitterMs: 1,
+      requestTimeoutMs: 10,
+      sleepFn: async () => undefined,
+      randomFn: () => 0,
+    });
+    const result = await client.runRow({ row_index: 0, transcription: 'x', entities: [] });
+    expect(calls).toBeGreaterThanOrEqual(2);
+    expect(result.request_id).toBe('req_test_0001');
+  });
+
+  it('extractCertUrls returns nulls when veil is absent', () => {
+    const r = extractCertUrls({ request_id: 'r' } as GatewayResponse);
+    expect(r.cert_url).toBeNull();
+    expect(r.summary_url).toBeNull();
+  });
+
+  it('extractCertUrls round-trips veil hints unchanged', () => {
+    const r = extractCertUrls(successResponse());
+    expect(r.cert_url).toBe('/api/v1/veil/certificate/abc123');
+    expect(r.summary_url).toBe('/api/v1/veil/certificate/abc123/summary');
+  });
+
+  it('rejects construction without gatewayUrl or apiKey', () => {
+    expect(() => makeGatewayClient({ gatewayUrl: '', apiKey: API_KEY })).toThrow(
+      /gatewayUrl is required/u,
+    );
+    expect(() => makeGatewayClient({ gatewayUrl: BASE_URL, apiKey: '' })).toThrow(
+      /apiKey is required/u,
+    );
+  });
+});
diff --git a/test/recall.spec.ts b/test/recall.spec.ts
new file mode 100644
index 0000000..1ce9f19
--- /dev/null
+++ b/test/recall.spec.ts
@@ -0,0 +1,200 @@
+import { describe, expect, it } from 'vitest';
+
+import { HIPAA_CATEGORIES, type InjectedRow } from '../src/inject-pii-core.js';
+import {
+  SPAN_OVERLAP_THRESHOLD,
+  aggregateExtracted,
+  computeRecallFromSpans,
+  type PredictedSpan,
+} from '../src/recall.js';
+import type { ExtractedRedaction } from '../src/redaction-extractor.js';
+
+describe('aggregateExtracted', () => {
+  it('computes per-category recall/precision/F1 from gateway-attested verdicts', () => {
+    // 5 rows, 22 entities, hand-tagged TP/FN/FP. The per-category math is
+    // checked exactly so a regression in the aggregation logic is caught.
+    const extracted: ExtractedRedaction[] = [
+      // NAME: 5 TP, 1 FN -> recall 5/6 ≈ 0.833, precision 5/5 = 1, F1 0.909
+      ...times(5, (i) => mkTp(1, 'NAME', `name-${i}`)),
+      mkFn(1, 'NAME', 'name-miss'),
+      // EMAIL: 2 TP, 0 FN -> recall 1.0, precision 1.0
+      mkTp(1, 'EMAIL', 'e1'),
+      mkTp(2, 'EMAIL', 'e2'),
+      // DATE: 3 TP, 1 FN, 1 FP -> recall 3/4 = 0.75, precision 3/4 = 0.75
+      ...times(3, (i) => mkTp(2, 'DATE', `d${i}`)),
+      mkFn(2, 'DATE', 'd-miss'),
+      mkFp(2, 'DATE', 'd-extra'),
+      // PHONE: 0 TP, 0 FN, 2 FP -> precision 0/2 = 0
+      mkFp(3, 'PHONE', 'p1'),
+      mkFp(3, 'PHONE', 'p2'),
+      // SSN: 1 TP -> recall 1.0
+      mkTp(4, 'SSN', 's1'),
+      // GEO_SUBDIVISION: 4 TP, 1 FN -> recall 4/5 = 0.8
+      ...times(4, (i) => mkTp(5, 'GEO_SUBDIVISION', `g${i}`)),
+      mkFn(5, 'GEO_SUBDIVISION', 'g-miss'),
+    ];
+
+    const summary = aggregateExtracted(extracted);
+    expect(summary.schema_version).toBe('1.0');
+    expect(summary.generator).toBe('lucairn-research/recall.ts');
+
+    // Per-category — locks specific TP/FN/FP counts and rates.
+    const byCat = new Map(summary.per_category.map((p) => [p.category, p.counts]));
+    expect(byCat.get('NAME')).toMatchObject({ tp: 5, fp: 0, fn: 1, precision: 1 });
+    expect(byCat.get('NAME')?.recall).toBeCloseTo(5 / 6, 6);
+    expect(byCat.get('EMAIL')).toMatchObject({ tp: 2, fp: 0, fn: 0, precision: 1, recall: 1, f1: 1 });
+    expect(byCat.get('DATE')).toMatchObject({ tp: 3, fp: 1, fn: 1, precision: 0.75, recall: 0.75 });
+    expect(byCat.get('PHONE')).toMatchObject({ tp: 0, fp: 2, fn: 0, precision: 0, recall: 0, f1: 0 });
+    expect(byCat.get('SSN')).toMatchObject({ tp: 1, fp: 0, fn: 0, precision: 1, recall: 1, f1: 1 });
+    expect(byCat.get('GEO_SUBDIVISION')).toMatchObject({ tp: 4, fp: 0, fn: 1, precision: 1 });
+    expect(byCat.get('GEO_SUBDIVISION')?.recall).toBeCloseTo(0.8, 6);
+
+    // Categories with no records still appear with zeros (per_category covers
+    // the full HIPAA enumeration in canonical order).
+    expect(summary.per_category.map((p) => p.category)).toEqual([...HIPAA_CATEGORIES]);
+    const mrn = byCat.get('MRN');
+    expect(mrn).toEqual({ tp: 0, fp: 0, fn: 0, precision: 0, recall: 0, f1: 0 });
+
+    // Overall — TP=15, FP=3, FN=3 -> precision 15/18 = 0.833, recall 15/18 = 0.833.
+    expect(summary.overall.tp).toBe(15);
+    expect(summary.overall.fp).toBe(3);
+    expect(summary.overall.fn).toBe(3);
+    expect(summary.overall.total_annotations).toBe(18);
+    expect(summary.overall.recall).toBeCloseTo(15 / 18, 6);
+    expect(summary.overall.precision).toBeCloseTo(15 / 18, 6);
+
+    // Per-row order is ascending by row_index. Row 1 holds the 5 NAME TPs
+    // plus the single EMAIL TP `e1` (`e2` is on row 2) and the 1 NAME FN.
+    expect(summary.per_row.map((r) => r.row_index)).toEqual([1, 2, 3, 4, 5]);
+    expect(summary.per_row[0]).toMatchObject({ row_index: 1, tp: 6, fn: 1, fp: 0 });
+  });
+
+  it('handles unmapped categories without exploding (kept out of per_category but counted in overall)', () => {
+    const extracted: ExtractedRedaction[] = [
+      mkTp(1, 'NAME', 'a'),
+      // hipaa_category null — happens when the gateway returns an unknown
+      // annotation_type or an unmapped placeholder appears in extras.
+      {
+        row_index: 1,
+        hipaa_category: null,
+        verdict: 'fp',
+        value: 'x',
+        placeholder: '[UNKNOWN_1]',
+        field: null,
+      },
+    ];
+    const summary = aggregateExtracted(extracted);
+    expect(summary.overall.fp).toBe(1);
+    expect(summary.notes.some((n) => /no HIPAA category mapping/iu.test(n))).toBe(true);
+    // NAME bucket still picks up its TP; UNKNOWN does not appear in per_category.
+    const byCat = new Map(summary.per_category.map((p) => [p.category, p.counts]));
+    expect(byCat.get('NAME')?.tp).toBe(1);
+  });
+
+  it('treats absent ground truth as recall=0 with total_annotations=0', () => {
+    const summary = aggregateExtracted([]);
+    expect(summary.overall.total_annotations).toBe(0);
+    expect(summary.overall.recall).toBe(0);
+    expect(summary.overall.f1).toBe(0);
+  });
+});
+
+describe('computeRecallFromSpans (≥50% character-overlap)', () => {
+  it('matches at the locked overlap threshold and counts TP/FN/FP correctly', () => {
+    // Single-row, hand-built ground truth + predictions.
+    const truth: InjectedRow = {
+      row_index: 100,
+      original_transcription: 'placeholder',
+      injected_transcription: 'placeholder',
+      entities: [
+        { category: 'NAME', value: 'Jane Roe', start_char: 0, end_char: 8 }, // len 8
+        { category: 'EMAIL', value: 'j@example.test', start_char: 20, end_char: 34 }, // len 14
+        { category: 'DATE', value: '2024-01-02', start_char: 40, end_char: 50 }, // len 10 — missed
+      ],
+    };
+    const predicted: PredictedSpan[] = [
+      // 50%-overlap exactly with NAME -> matches (>=0.5 inclusive).
+      { category: 'NAME', start_char: 4, end_char: 12, value: 'Jane Roe' },
+      // EMAIL: prediction fully covers the truth -> 100% overlap, matches.
+      { category: 'EMAIL', start_char: 18, end_char: 40, value: 'j@example.test' },
+      // 40%-overlap with DATE -> below threshold, counts as FP.
+      { category: 'DATE', start_char: 36, end_char: 44, value: '2024' },
+    ];
+
+    const summary = computeRecallFromSpans([truth], [{ row_index: 100, spans: predicted }]);
+    const byCat = new Map(summary.per_category.map((p) => [p.category, p.counts]));
+    expect(byCat.get('NAME')).toMatchObject({ tp: 1, fp: 0, fn: 0 });
+    expect(byCat.get('EMAIL')).toMatchObject({ tp: 1, fp: 0, fn: 0 });
+    expect(byCat.get('DATE')).toMatchObject({ tp: 0, fp: 1, fn: 1 });
+    expect(summary.overall).toMatchObject({
+      tp: 2,
+      fp: 1,
+      fn: 1,
+      total_annotations: 3,
+    });
+    expect(summary.overall.recall).toBeCloseTo(2 / 3, 6);
+  });
+
+  it('exposes the SPAN_OVERLAP_THRESHOLD const as 0.5 (regression lock)', () => {
+    expect(SPAN_OVERLAP_THRESHOLD).toBe(0.5);
+  });
+});
+
+// ---- helpers ----
+
+function mkTp(
+  rowIndex: number,
+  category: ExtractedRedaction['hipaa_category'],
+  value: string,
+): ExtractedRedaction {
+  return {
+    row_index: rowIndex,
+    hipaa_category: category,
+    verdict: 'tp',
+    value,
+    placeholder: `[${categoryToInternal(category)}_${value}]`,
+    field: null,
+  };
+}
+
+function mkFn(
+  rowIndex: number,
+  category: ExtractedRedaction['hipaa_category'],
+  value: string,
+): ExtractedRedaction {
+  return {
+    row_index: rowIndex,
+    hipaa_category: category,
+    verdict: 'fn',
+    value,
+    placeholder: null,
+    field: 'transcription',
+  };
+}
+
+function mkFp(
+  rowIndex: number,
+  category: ExtractedRedaction['hipaa_category'],
+  value: string,
+): ExtractedRedaction {
+  return {
+    row_index: rowIndex,
+    hipaa_category: category,
+    verdict: 'fp',
+    value,
+    placeholder: `[${categoryToInternal(category)}_${value}]`,
+    field: null,
+  };
+}
+
+function categoryToInternal(category: ExtractedRedaction['hipaa_category']): string {
+  // Sufficient for synthetic test fixtures only — we are not exercising the
+  // mapping table here, just generating plausible-looking placeholders.
+  return category ?? 'UNKNOWN';
+}
+
+function times<T>(n: number, f: (i: number) => T): T[] {
+  const out: T[] = [];
+  for (let i = 0; i < n; i++) out.push(f(i));
+  return out;
+}
diff --git a/test/redaction-extractor.spec.ts b/test/redaction-extractor.spec.ts
new file mode 100644
index 0000000..e47c858
--- /dev/null
+++ b/test/redaction-extractor.spec.ts
@@ -0,0 +1,136 @@
+import { describe, expect, it } from 'vitest';
+
+import { HIPAA_CATEGORIES } from '../src/inject-pii-core.js';
+import {
+  extractFromEvaluation,
+  unmappedExtraTypes,
+} from '../src/redaction-extractor.js';
+import {
+  LUCAIRN_TO_HIPAA,
+  parsePlaceholderType,
+  placeholderToHipaaCategory,
+} from '../src/hipaa-category-mapping.js';
+
+describe('parsePlaceholderType', () => {
+  it('parses well-formed `[TYPE_N]` placeholders', () => {
+    expect(parsePlaceholderType('[PERSON_1]')).toBe('PERSON');
+    expect(parsePlaceholderType('[PHONE_NUMBER_12]')).toBe('PHONE_NUMBER');
+    expect(parsePlaceholderType('[EMAIL_ADDRESS_42]')).toBe('EMAIL_ADDRESS');
+  });
+
+  it('returns null for malformed placeholders', () => {
+    expect(parsePlaceholderType('PERSON_1')).toBeNull(); // no brackets
+    expect(parsePlaceholderType('[]')).toBeNull(); // empty
+    expect(parsePlaceholderType('[PERSON]')).toBeNull(); // no _N suffix
+    expect(parsePlaceholderType('[PERSON_]')).toBeNull(); // trailing underscore, no digits
+    expect(parsePlaceholderType('[PERSON_abc]')).toBeNull(); // non-digit suffix
+    expect(parsePlaceholderType('[_1]')).toBeNull(); // missing type prefix
+  });
+});
+
+describe('placeholderToHipaaCategory', () => {
+  it('maps Lucairn internal types to HIPAA Safe Harbor categories', () => {
+    expect(placeholderToHipaaCategory('[PERSON_1]')).toBe('NAME');
+    expect(placeholderToHipaaCategory('[LOCATION_2]')).toBe('GEO_SUBDIVISION');
+    expect(placeholderToHipaaCategory('[PHONE_NUMBER_3]')).toBe('PHONE');
+    expect(placeholderToHipaaCategory('[EMAIL_ADDRESS_4]')).toBe('EMAIL');
+    expect(placeholderToHipaaCategory('[US_SSN_5]')).toBe('SSN');
+    expect(placeholderToHipaaCategory('[IBAN_6]')).toBe('ACCOUNT_NUMBER');
+    expect(placeholderToHipaaCategory('[URL_7]')).toBe('URL');
+    expect(placeholderToHipaaCategory('[IP_ADDRESS_8]')).toBe('IP_ADDRESS');
+  });
+
+  it('returns null for placeholders whose internal type is not in the map', () => {
+    expect(placeholderToHipaaCategory('[UNKNOWN_TYPE_1]')).toBeNull();
+    expect(placeholderToHipaaCategory('[FOO_BAR_9]')).toBeNull();
+  });
+});
+
+describe('LUCAIRN_TO_HIPAA mapping', () => {
+  it('every right-hand side is a valid HipaaCategory', () => {
+    const valid = new Set<string>(HIPAA_CATEGORIES);
+    for (const [internalType, hipaa] of Object.entries(LUCAIRN_TO_HIPAA)) {
+      expect(valid.has(hipaa), `entry ${internalType} -> ${hipaa}`).toBe(true);
+    }
+  });
+
+  it('covers the standard Presidio/Lucairn vocabulary the gateway emits', () => {
+    // Smoke list of internal types observed in proxy.go::extractEntityTypes
+    // and the Presidio recognizer catalogue. Any future regression where one
+    // of these disappears from the mapping is a Slice 3 hazard.
+    const required = [
+      'PERSON',
+      'LOCATION',
+      'DATE',
+      'PHONE_NUMBER',
+      'EMAIL_ADDRESS',
+      'US_SSN',
+      'IBAN',
+      'URL',
+      'IP_ADDRESS',
+      'CREDIT_CARD',
+    ];
+    for (const t of required) {
+      expect(LUCAIRN_TO_HIPAA[t], `mapping missing for ${t}`).toBeTruthy();
+    }
+  });
+});
+
+describe('extractFromEvaluation', () => {
+  it('flattens matches/missed/extras into ExtractedRedaction[] with verdicts', () => {
+    const extracted = extractFromEvaluation(42, {
+      total_annotations: 3,
+      true_positives: 1,
+      false_negatives: 1,
+      false_positives: 1,
+      detection_rate: 1 / 3,
+      matches: [
+        { annotation_type: 'NAME', annotation_value: 'Alex Doe', redacted_as: '[PERSON_1]' },
+      ],
+      missed: [{ field: 'transcription', type: 'EMAIL', value: 'a@b.com' }],
+      extras: [{ placeholder: '[PERSON_99]', original: 'Riverside Hospital' }],
+    });
+    expect(extracted).toHaveLength(3);
+    const byVerdict = new Map(extracted.map((r) => [r.verdict, r]));
+    expect(byVerdict.get('tp')?.hipaa_category).toBe('NAME');
+    expect(byVerdict.get('tp')?.placeholder).toBe('[PERSON_1]');
+    expect(byVerdict.get('fn')?.hipaa_category).toBe('EMAIL');
+    expect(byVerdict.get('fn')?.placeholder).toBeNull();
+    expect(byVerdict.get('fp')?.hipaa_category).toBe('NAME');
+    expect(byVerdict.get('fp')?.placeholder).toBe('[PERSON_99]');
+  });
+
+  it('tags unknown annotation_type strings as null (does not silently widen)', () => {
+    const extracted = extractFromEvaluation(0, {
+      total_annotations: 1,
+      true_positives: 1,
+      false_negatives: 0,
+      false_positives: 0,
+      detection_rate: 1.0,
+      matches: [
+        {
+          annotation_type: 'SOME_NEW_HIPAA_VARIANT',
+          annotation_value: 'x',
+          redacted_as: '[PERSON_1]',
+        },
+      ],
+    });
+    expect(extracted[0]?.hipaa_category).toBeNull();
+    expect(extracted[0]?.verdict).toBe('tp');
+  });
+
+  it('surfaces unmapped Lucairn placeholder types via unmappedExtraTypes', () => {
+    const unmapped = unmappedExtraTypes({
+      total_annotations: 0,
+      true_positives: 0,
+      false_negatives: 0,
+      false_positives: 2,
+      detection_rate: 1.0,
+      extras: [
+        { placeholder: '[PERSON_1]', original: 'Alex' }, // mapped → NAME
+        { placeholder: '[FUTURE_TYPE_X_1]', original: 'X' }, // unmapped
+      ],
+    });
+    expect(unmapped).toEqual(['FUTURE_TYPE_X']);
+  });
+});

From f924cbd3e77531c34ef76058826f67cc4e78c9dd Mon Sep 17 00:00:00 2001
From: Declade <110547349+Declade@users.noreply.github.com>
Date: Sun, 17 May 2026 11:52:16 +0200
Subject: [PATCH 4/5] fix(slice-2): close reviewer-chain findings (B1 mapping +
 H1-4 + 3 MEDs)

- B1 (bug-hunter BLOCKER): rewrite hipaa-category-mapping table to
  match the live placeholder vocabulary from presidio_scan.py:31-58
  (PERSON, EMAIL, PHONE, LOCATION, IBAN, CC, SSN, URL, DOB).
  ID and SECRET intentionally null-mapped (placeholder collapses
  multiple HIPAA categories; documented limitation surfaces as
  unmapped_extras). Update regression test to walk PRESIDIO_TO_PLACEHOLDER
  values + assert every value is mapped or explicitly null-mapped.
- H1 (bug-hunter HIGH): rewrite mock fixture PLACEHOLDER_FOR_CATEGORY
  to emit live-production placeholder shapes (no more synthetic
  [MEDICAL_RECORD_NUMBER_1] etc.). Add [ID_N] regression test in
  recall.spec.ts to exercise the unmapped-extras accounting path.
- H2 (bug-hunter HIGH): filter ground-truth annotations with
  value.trim().length < 3 in buildGroundTruth (containment-match
  safety; defensive against future Faker regression). Emit
  console.warn with dropped count only (never the dropped values).
- H3 (bug-hunter HIGH): validate SUMMARY.json BEFORE writeFile in
  compute-recall.ts, not after, so a bogus SUMMARY.json never lands
  on disk for downstream consumers.
- H4 (bug-hunter HIGH): plumb X-Upstream-Key header through
  gateway-client + run-pipeline --upstream-key flag for Slice 3
  BYOK-per-request flow (proxy.go:349-354 gate). LUCAIRN_UPSTREAM_KEY
  env var fallback. Empty-string treated as absent. Help text + auth-
  modes table documented.
- claim-enforce MED: append "No attributed endorsement quotes" to
  README.md:15 to recover the testimonial guardrail dropped in the
  Slice 2 banned-literal sweep rephrase.
- personal-info-leak MED: rename lcr_live_test_* / lcr_live_mock_*
  to lcr_test_* / lcr_mock_* in test fixtures so the repo is safe
  for secret-scanner pass post-public-flip.
- regulator-validator WARN: add matching-semantics disclosure to
  papers/_template/SUMMARY.schema.json description so auditors reading
  SUMMARY.json in isolation cannot misinterpret containment recall as
  span-exact i2b2-style recall.

Deferred to Slice 3:
- M1 NDJSON streaming writer (lost-data crash protection)
- M2 rate-limit/concurrency + 429 retry policy for live Anthropic upstream
- M3 hard-fail on malformed ground-truth/transcription rows
- M4 hardening of the in-process JSON-Schema validator (or swap to ajv)
- M5 detection_rate empty-row contract test
- regulator WARN 1: fax/phone disclosure in Paper 1 body
- regulator WARN 2: recall match-semantics in Paper 1 body Methods/Limits
---
 README.md                            |   2 +-
 papers/_template/SUMMARY.schema.json |   2 +-
 scripts/compute-recall.ts            |   4 +-
 scripts/run-pipeline.ts              |  37 +++++-
 src/gateway-client.ts                |  89 +++++++++++++--
 src/hipaa-category-mapping.ts        | 162 ++++++++++++---------------
 src/mocks/gateway-fixtures.ts        |  63 +++++++----
 test/gateway-client.spec.ts          | 105 ++++++++++++++++-
 test/recall.spec.ts                  |  30 +++++
 test/redaction-extractor.spec.ts     | 100 +++++++++++++----
 10 files changed, 441 insertions(+), 153 deletions(-)

diff --git a/README.md b/README.md
index 9ee3967..5c4e8f9 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Empirical methodology code for the Lucairn Research Program — a per-industry s
 ## What this repo is NOT
 
 - Not a Lucairn product. The Lucairn platform itself lives elsewhere (gateway, sanitizer, witness, certificate verifier).
-- Not a customer-deployment artifact. These are vendor-published methodology papers; the publisher and the methodology are named in full. No customer attribution. No persona-driven narrative.
+- Not a customer-deployment artifact. These are vendor-published methodology papers; the publisher and the methodology are named in full. No customer attribution. No persona-driven narrative. No attributed endorsement quotes.
 - Not a CLI or a publishable npm package. It is a methodology codebase, run from a clone.
 - Not a customer-implementation report. The artifact frame is a vendor benchmark / methodology paper; persona-driven or implementation-report framing does not appear in any paper title, route slug, social card, or meta description.
 - Not legal advice. Regulatory references are factual citations to primary sources (EUR-Lex Regulation 2024/1689; HHS HIPAA Safe Harbor enumeration; published clinical-NLP de-identification literature); they are not interpretations.
diff --git a/papers/_template/SUMMARY.schema.json b/papers/_template/SUMMARY.schema.json
index f1a2f4f..bc7ab5c 100644
--- a/papers/_template/SUMMARY.schema.json
+++ b/papers/_template/SUMMARY.schema.json
@@ -2,7 +2,7 @@
   "$schema": "https://json-schema.org/draft/2020-12/schema",
   "$id": "https://github.com/Declade/lucairn-research/papers/_template/SUMMARY.schema.json",
   "title": "Lucairn Research Program — per-paper SUMMARY.json",
-  "description": "Aggregate recall / precision / F1 numbers per HIPAA Safe Harbor category + overall + per-row breakdown for any paper in the Lucairn Research Program. Mirrors the RecallSummary shape produced by src/recall.ts.",
+  "description": "Aggregate recall / precision / F1 numbers per HIPAA Safe Harbor category + overall + per-row breakdown for any paper in the Lucairn Research Program. Mirrors the RecallSummary shape produced by src/recall.ts. Recall numbers are produced by the gateway's compareGroundTruth function at services/gateway/internal/api/ground_truth.go:69-138 in the dual-sandbox-architecture repo (case-insensitive bidirectional value-containment with whitespace normalization, server-side; not span-exact overlap). The publisher (Lucairn) ships this matcher in production; the research repo aggregates its verdicts.",
   "type": "object",
   "required": [
     "schema_version",
diff --git a/scripts/compute-recall.ts b/scripts/compute-recall.ts
index 801dbb1..bd2e5ec 100644
--- a/scripts/compute-recall.ts
+++ b/scripts/compute-recall.ts
@@ -342,8 +342,10 @@ async function main(): Promise<void> {
   const summary = aggregateExtracted(extracted, [
     `Source: ${cli.redactionsSource}; rows processed: ${targetRows.length}.`,
   ]);
-  await writeFile(cli.output, JSON.stringify(summary, null, 2) + '\n', 'utf8');
+  // Validate BEFORE writing. If validation throws, a bogus SUMMARY.json
+  // never lands on disk for downstream consumers to consume.
   await validateAgainstSchema(summary, defaultSchemaPath());
+  await writeFile(cli.output, JSON.stringify(summary, null, 2) + '\n', 'utf8');
 
   process.stdout.write(
     `wrote SUMMARY.json (${targetRows.length} rows, ` +
diff --git a/scripts/run-pipeline.ts b/scripts/run-pipeline.ts
index af0f43c..1a057a3 100644
--- a/scripts/run-pipeline.ts
+++ b/scripts/run-pipeline.ts
@@ -43,7 +43,12 @@ const DEFAULT_TRUTH_PATH =
 const DEFAULT_SUBSET_PATH =
   'datasets/healthcare/with-injected-pii/measurement-b-subset.csv';
 const MOCK_GATEWAY_URL = 'http://mock.lucairn.local';
-const MOCK_API_KEY = 'lcr_live_mock_0000000000000000000000000000';
+// Synthetic mock key. Uses an `lcr_mock_` prefix (NOT `lcr_live_`) so the
+// real production key prefix never appears in committed code — secret
+// scanners (truffleHog, gitleaks, GitHub secret scanning) would otherwise
+// flag this file the moment the repo flips public. Length preserved so any
+// length-based sanity checks elsewhere don't drift.
+const MOCK_API_KEY = 'lcr_mock_0000000000000000000000000000';
 
 interface CliArgs {
   rows: number | null;
@@ -54,6 +59,17 @@ interface CliArgs {
   output: string;
   gateway: string | null;
   apiKey: string | null;
+  /**
+   * Upstream LLM API key (Anthropic for Claude models, OpenAI for GPT
+   * models, etc.) for BYOK-per-request customer profiles. Wired as
+   * `X-Upstream-Key` header on every gateway call. Required when the
+   * Lucairn customer profile has `ByokPerRequest: true` — the gateway
+   * returns 400 `missing_upstream_key` otherwise. See
+   * `dual-sandbox-architecture/services/gateway/internal/api/proxy.go:349-354`
+   * for the gate. Falls back to `process.env.LUCAIRN_UPSTREAM_KEY` when the
+   * flag is absent. Ignored under `--mock`.
+   */
+  upstreamKey: string | null;
   missRate: number;
   spuriousFpCount: number;
   activityIdPrefix: string;
@@ -71,6 +87,7 @@ function parseArgs(argv: readonly string[]): CliArgs {
       .replace(/[:.]/gu, '-')}.ndjson`,
     gateway: null,
     apiKey: null,
+    upstreamKey: null,
     missRate: 0,
     spuriousFpCount: 0,
     activityIdPrefix: 'paper-1-healthcare',
@@ -104,6 +121,9 @@ function parseArgs(argv: readonly string[]): CliArgs {
       case '--api-key':
         args.apiKey = val;
         break;
+      case '--upstream-key':
+        args.upstreamKey = val;
+        break;
       case '--miss-rate':
         args.missRate = parseFloatOrThrow(val, '--miss-rate');
         break;
@@ -154,10 +174,19 @@ function printHelp(): void {
     '  --output=PATH        NDJSON output path. Default: papers/paper-1-healthcare/raw-results/run-<ISO>.ndjson',
     '  --gateway=URL        gateway URL override (also honoured under --live).',
     '  --api-key=KEY        API key override (--live only).',
+    '  --upstream-key=KEY   Upstream LLM API key for BYOK-per-request customer profiles.',
+    '                       Sent as X-Upstream-Key header. Falls back to LUCAIRN_UPSTREAM_KEY env.',
+    '                       Required when the Lucairn profile has ByokPerRequest: true; otherwise',
+    '                       the gateway returns HTTP 400 missing_upstream_key. Ignored under --mock.',
     '  --miss-rate=F        --mock only. Fraction of injected entities the mock misses. Default: 0.',
     '  --spurious-fp-count=N --mock only. Synthetic FP redactions per row. Default: 0.',
     '  --activity-id-prefix=S  per-row activity_id prefix. Default: paper-1-healthcare.',
     '',
+    'Auth modes for --live runs (4 valid combinations):',
+    '  1. lcr_live_* key + non-BYOK customer profile     → only --api-key / LUCAIRN_API_KEY required.',
+    '  2. lcr_live_* key + ByokPerRequest profile         → --api-key + --upstream-key both required.',
+    '  3. Direct provider key + X-DSA-Key auth fallback   → not supported by this harness.',
+    '  4. Authorization: Bearer relay                     → not supported by this harness.',
     'Slice 2 ships --mock support only. --live is reserved for Slice 3 and requires Marc-confirmation.',
   ];
   for (const ln of lines) {
@@ -290,6 +319,10 @@ async function main(): Promise<void> {
   let mock: MockServerHandle | null = null;
   let gatewayUrl: string;
   let apiKey: string;
+  // Upstream LLM API key for BYOK-per-request flows; null when --mock or
+  // when the customer profile doesn't require BYOK. See the auth-modes
+  // table in printHelp() for the four valid combinations.
+  let upstreamKey: string | null = null;
   if (cli.mock) {
     mock = mountMockServer(cli.missRate, cli.spuriousFpCount);
     gatewayUrl = MOCK_GATEWAY_URL;
@@ -298,6 +331,7 @@ async function main(): Promise<void> {
     const env = readGatewayEnv();
     gatewayUrl = cli.gateway ?? env.gatewayUrl ?? '';
     apiKey = cli.apiKey ?? env.apiKey ?? '';
+    upstreamKey = cli.upstreamKey ?? env.upstreamKey ?? null;
     if (gatewayUrl === '' || apiKey === '') {
       throw new Error(
         '--live requires LUCAIRN_GATEWAY_URL + LUCAIRN_API_KEY in env or --gateway / --api-key flags',
@@ -310,6 +344,7 @@ async function main(): Promise<void> {
   const client = makeGatewayClient({
     gatewayUrl,
     apiKey,
+    ...(upstreamKey !== null ? { upstreamKey } : {}),
     activityIdPrefix: cli.activityIdPrefix,
   });
 
diff --git a/src/gateway-client.ts b/src/gateway-client.ts
index 5dc8c11..751149f 100644
--- a/src/gateway-client.ts
+++ b/src/gateway-client.ts
@@ -19,6 +19,10 @@
  *       (ground_truth_evaluation field emission)
  *     - dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:5-138
  *       (groundTruthResult + per-item shapes)
+ *     - dual-sandbox-architecture/services/gateway/internal/api/proxy.go:349-354
+ *       (BYOK-per-request gate — returns 400 missing_upstream_key when the
+ *       customer profile requires per-request upstream keys and the
+ *       X-Upstream-Key header is absent).
  *
  * The retry policy is 2 retries with exponential backoff (base 500 ms, jitter
  * 0–200 ms) on 5xx and connection errors only. 4xx errors are surfaced
@@ -156,6 +160,17 @@ export interface GatewayRowResult {
 export interface GatewayClientOptions {
   readonly gatewayUrl: string;
   readonly apiKey: string;
+  /**
+   * Upstream LLM API key for BYOK-per-request customer profiles. When set,
+   * emitted as the `X-Upstream-Key` HTTP header on every request. Required
+   * for Slice 3 live runs when the Lucairn customer profile has
+   * `ByokPerRequest: true` — the gateway returns a 400
+   * `missing_upstream_key` otherwise (see
+   *   dual-sandbox-architecture/services/gateway/internal/api/proxy.go:349-354
+   * for the gate). May be supplied via the `LUCAIRN_UPSTREAM_KEY` env var as
+   * a fallback when not set explicitly.
+   */
+  readonly upstreamKey?: string;
   readonly activityIdPrefix?: string;
   readonly requestTimeoutMs?: number;
   readonly maxRetries?: number;
@@ -197,22 +212,57 @@ function defaultSleep(ms: number): Promise<void> {
   });
 }
 
+/**
+ * Minimum trimmed length of a ground-truth annotation value the harness will
+ * submit to the gateway. Defensive guard against future Faker regressions —
+ * the gateway's matcher (`compareGroundTruth` at
+ *   dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:82-95
+ * ) drops empty-after-trim values but NOT 1- or 2-char values. A 1-2 char
+ * value used as a containment-match needle has a high prior on spurious
+ * matches (e.g. annotation `value: "X"` matches every sanitizer redaction
+ * whose Original contains the letter X). Faker outputs in
+ * `inject-pii-core.ts:122-161` empirically always emit values ≥3 chars per
+ * category, but pinning the floor here protects against silent regressions.
+ */
+const MIN_GROUND_TRUTH_VALUE_LENGTH = 3;
+
 /**
  * Construct an annotation list suitable for the proving-ground ground_truth
  * field. The keying field name is fixed at `transcription` because that is
  * the single context field we route through the sanitizer.
+ *
+ * Filters out annotations whose `value.trim().length` is below
+ * MIN_GROUND_TRUTH_VALUE_LENGTH and emits a single console.warn with the
+ * dropped count (never the dropped values — those are PII even when
+ * synthetic). The filter rationale + cite-back live on
+ * MIN_GROUND_TRUTH_VALUE_LENGTH above.
  */
 function buildGroundTruth(
   entities: readonly InjectedEntity[],
 ): Record<string, ProvingGroundAnnotation[]> {
-  return {
-    transcription: entities.map((e) => ({
+  const kept: ProvingGroundAnnotation[] = [];
+  let droppedCount = 0;
+  for (const e of entities) {
+    if (e.value.trim().length < MIN_GROUND_TRUTH_VALUE_LENGTH) {
+      droppedCount += 1;
+      continue;
+    }
+    kept.push({
       type: e.category,
       value: e.value,
       start: e.start_char,
       end: e.end_char,
-    })),
-  };
+    });
+  }
+  if (droppedCount > 0) {
+    // eslint-disable-next-line no-console
+    console.warn(
+      `[gateway-client] dropped ${droppedCount} ground-truth annotation(s) ` +
+        `with value.trim().length < ${MIN_GROUND_TRUTH_VALUE_LENGTH} (containment-match safety; see ` +
+        `ground_truth.go:82-95)`,
+    );
+  }
+  return { transcription: kept };
 }
 
 /**
@@ -253,6 +303,13 @@ export function makeGatewayClient(options: GatewayClientOptions): GatewayClient
   const activityPrefix = options.activityIdPrefix ?? 'paper-1-healthcare';
   const model = options.model ?? DEFAULT_MODEL;
   const maxTokens = options.maxTokens ?? DEFAULT_MAX_TOKENS;
+  // Empty-string upstreamKey is treated as "absent" so callers can pass
+  // `process.env.LUCAIRN_UPSTREAM_KEY ?? ''` without accidentally emitting a
+  // header with no value.
+  const upstreamKey =
+    typeof options.upstreamKey === 'string' && options.upstreamKey.length > 0
+      ? options.upstreamKey
+      : null;
   const endpoint = `${options.gatewayUrl.replace(/\/+$/u, '')}/api/v1/proxy/messages`;
 
   async function runRow(row: GatewayRowInput): Promise<GatewayRowResult> {
@@ -279,12 +336,16 @@ export function makeGatewayClient(options: GatewayClientOptions): GatewayClient
         timeoutHandle = setTimeout(() => {
           controller?.abort();
         }, timeoutMs);
+        const headers: Record<string, string> = {
+          'content-type': 'application/json',
+          'x-api-key': options.apiKey,
+        };
+        if (upstreamKey !== null) {
+          headers['x-upstream-key'] = upstreamKey;
+        }
         const response = await fetchFn(endpoint, {
           method: 'POST',
-          headers: {
-            'content-type': 'application/json',
-            'x-api-key': options.apiKey,
-          },
+          headers,
           body: JSON.stringify(body),
           signal: controller.signal,
         });
@@ -369,16 +430,22 @@ async function safeReadText(response: Response): Promise<string | null> {
 }
 
 /**
- * Read gateway URL + API key from process.env. Returns null fields if unset
- * so callers can decide whether to enter mock mode or fail.
+ * Read gateway URL + API key + optional upstream LLM API key from
+ * process.env. Returns null fields if unset so callers can decide whether to
+ * enter mock mode or fail. `upstreamKey` is sourced from
+ * `LUCAIRN_UPSTREAM_KEY` and is required for BYOK-per-request customer
+ * profiles in Slice 3 live runs (see `GatewayClientOptions.upstreamKey`
+ * for the gate cite-back).
  */
 export function readGatewayEnv(env: NodeJS.ProcessEnv = process.env): {
   gatewayUrl: string | null;
   apiKey: string | null;
+  upstreamKey: string | null;
   requestTimeoutMs: number | null;
 } {
   const url = env.LUCAIRN_GATEWAY_URL ?? null;
   const key = env.LUCAIRN_API_KEY ?? null;
+  const upstreamKey = env.LUCAIRN_UPSTREAM_KEY ?? null;
   const timeoutStr = env.LUCAIRN_REQUEST_TIMEOUT_MS ?? null;
   let timeoutMs: number | null = null;
   if (timeoutStr !== null) {
@@ -387,5 +454,5 @@ export function readGatewayEnv(env: NodeJS.ProcessEnv = process.env): {
       timeoutMs = parsed;
     }
   }
-  return { gatewayUrl: url, apiKey: key, requestTimeoutMs: timeoutMs };
+  return { gatewayUrl: url, apiKey: key, upstreamKey, requestTimeoutMs: timeoutMs };
 }
diff --git a/src/hipaa-category-mapping.ts b/src/hipaa-category-mapping.ts
index be47108..45340e1 100644
--- a/src/hipaa-category-mapping.ts
+++ b/src/hipaa-category-mapping.ts
@@ -1,116 +1,89 @@
 /**
  * hipaa-category-mapping.ts
  *
- * Maps Lucairn's internal sanitizer placeholder types (the `[TYPE_N]` shape)
- * back to the 18 HIPAA Safe Harbor categories defined in
- * `src/inject-pii-core.ts:28-47` (45 CFR § 164.514(b)(2)(i)).
+ * Maps Lucairn's LIVE placeholder prefixes (the `[PREFIX_N]` shape emitted by
+ * the sanitizer in production) back to the 18 HIPAA Safe Harbor categories
+ * defined in `src/inject-pii-core.ts:28-47` (45 CFR § 164.514(b)(2)(i)).
  *
  * Why this exists:
  *   The Lucairn sanitizer emits redactions whose `placeholder` field is of the
- *   form `[TYPE_N]` where TYPE is an internal taxonomy term (PERSON, LOCATION,
- *   PHONE_NUMBER, etc.). The HIPAA Safe Harbor enumeration is the standard the
- *   research program reports recall against. This module is the documented
- *   bridge between the two taxonomies.
+ *   form `[PREFIX_N]` where PREFIX comes from the `PRESIDIO_TO_PLACEHOLDER`
+ *   dict in
+ *     dual-sandbox-architecture/services/sanitizer/presidio_scan.py:31-58
+ *   (i.e. one of the 11 LIVE values: PERSON, EMAIL, PHONE, LOCATION, IBAN, CC,
+ *   SSN, ID, URL, DOB, SECRET — confirmed by the placeholder-emit format at
+ *   `placeholders.py:52` `f"[{pii_type}_{count}]"`). The HIPAA Safe Harbor
+ *   enumeration is the standard the research program reports recall against.
+ *   This module is the documented bridge between the two taxonomies.
  *
- * Cite-back: gateway emits `placeholder` per redaction at
- *   `dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:48-56`
- * and the placeholder parsing convention at
- *   `dual-sandbox-architecture/services/gateway/internal/api/proxy.go:1361-1395`
- * (extractEntityTypes — accepts `[TYPE_N]` where TYPE is one or more
- * uppercase letters/underscores).
+ * Cite-back:
+ *   - Live placeholder vocabulary (source-of-truth):
+ *       dual-sandbox-architecture/services/sanitizer/presidio_scan.py:31-58
+ *       (`PRESIDIO_TO_PLACEHOLDER` dict)
+ *   - Placeholder emit format `[{pii_type}_{count}]`:
+ *       dual-sandbox-architecture/services/sanitizer/placeholders.py:52
+ *   - Gateway emits `placeholder` per redaction at:
+ *       dual-sandbox-architecture/services/gateway/internal/api/ground_truth.go:48-56
+ *   - Gateway's own parsing convention `[TYPE_N]`:
+ *       dual-sandbox-architecture/services/gateway/internal/api/proxy.go:1361-1395
+ *       (extractEntityTypes — accepts uppercase letters/underscores + digits suffix).
  *
- * The mapping is intentionally explicit and one-way (internal → HIPAA). If
- * Lucairn introduces a new sanitizer type, this table MUST be extended before
- * Paper 1 numbers are re-published — an unmapped placeholder is a recall
- * accounting gap, not a silent passthrough.
+ * IMPORTANT — what this table is used for:
+ *   The harness's TP and FN attribution flow through the ground-truth
+ *   annotation's HIPAA `annotation_type` (since the harness submits HIPAA
+ *   categories as `ProvingGroundAnnotation.type`), NOT through this table.
+ *   This table is consulted only for FALSE POSITIVES surfaced in
+ *   `extras[].placeholder` — where the gateway returns the placeholder the
+ *   sanitizer emitted, and the harness needs to attribute the FP to a HIPAA
+ *   category bucket.
+ *
+ * Documented limitations:
+ *   - `[ID_N]` is the sanitizer's COLLAPSE bucket for many distinct Presidio
+ *     entity types (MRN-shaped, US_BANK_NUMBER, US_PASSPORT,
+ *     US_DRIVER_LICENSE, UK_NHS, SG_NRIC_FIN, AU_ABN, AU_TFN, AU_MEDICARE,
+ *     IN_PAN, IP_ADDRESS, the four custom German recognizers Fallnummer /
+ *     Personalausweis / Steuer-ID / SVNR, AND the unknown-entity fallback).
+ *     These map to at least six different HIPAA categories (MRN,
+ *     HEALTH_PLAN_ID, ACCOUNT_NUMBER, LICENSE_NUMBER, IP_ADDRESS,
+ *     OTHER_UNIQUE_ID). The placeholder shape alone cannot disambiguate them.
+ *     `placeholderToHipaaCategory('[ID_N]')` therefore returns `null` by
+ *     design.
+ *   - `[SECRET_N]` is the W5+ Phase 1 (2026-05-09) detect-secrets +
+ *     SaaS-API-key bucket. Secrets are not a HIPAA Safe Harbor category in
+ *     the 18-enumeration sense (45 CFR § 164.514(b)(2)(i)).
+ *     `placeholderToHipaaCategory('[SECRET_N]')` returns `null` by design.
+ *
+ * FP counts whose placeholder maps to null surface in the
+ * `unmappedExtraTypes()` accounting in `src/redaction-extractor.ts:111-127`
+ * and `src/recall.ts:142-167` so they remain visible in the SUMMARY notes
+ * rather than being silently dropped.
  */
 
 import type { HipaaCategory } from './inject-pii-core.js';
 
 /**
- * The exhaustive mapping from Lucairn internal sanitizer types to HIPAA Safe
- * Harbor categories.
+ * The mapping from Lucairn LIVE placeholder prefixes (per
+ * `presidio_scan.py:31-58`) to HIPAA Safe Harbor categories.
  *
- * Sources for the right-hand-side category assignments:
- *  - 45 CFR § 164.514(b)(2)(i) Safe Harbor enumeration (the 18 categories
- *    listed in `src/inject-pii-core.ts:28-47`).
- *  - Lucairn sanitizer's internal type vocabulary as observed in the gateway
- *    `extractEntityTypes` logic (`proxy.go:1361-1395`) and the Presidio +
- *    custom recognizer catalogue.
+ * The 11 live prefixes are: PERSON, EMAIL, PHONE, LOCATION, IBAN, CC, SSN,
+ * ID, URL, DOB, SECRET. `ID` and `SECRET` are deliberately UNMAPPED (they
+ * collapse multiple HIPAA categories / are not Safe Harbor categories
+ * respectively; see file-level doc-comment for the full rationale).
  *
- * Categories not currently emitted by the sanitizer (e.g. FACE_PHOTO_REF,
- * BIOMETRIC_ID) are absent from this map; they appear in injected ground
- * truth only and will show as false-negatives if the sanitizer never detects
- * them, which is correct accounting.
+ * If `presidio_scan.py` adds a new placeholder value, the regression test in
+ * `test/redaction-extractor.spec.ts` will fail until this table is updated
+ * or the new prefix is added to that test's `KNOWN_UNMAPPED` set.
  */
 export const LUCAIRN_TO_HIPAA: Readonly<Record<string, HipaaCategory>> = Object.freeze({
-  // Name-bearing types
   PERSON: 'NAME',
-  PERSON_NAME: 'NAME',
-  NAME: 'NAME',
-
-  // Geographic subdivisions
-  LOCATION: 'GEO_SUBDIVISION',
-  ADDRESS: 'GEO_SUBDIVISION',
-  STREET_ADDRESS: 'GEO_SUBDIVISION',
-  ZIP_CODE: 'GEO_SUBDIVISION',
-  GERMAN_ZIP_CODE: 'GEO_SUBDIVISION',
-  CITY: 'GEO_SUBDIVISION',
-
-  // Dates
-  DATE: 'DATE',
-  DATE_TIME: 'DATE',
-
-  // Telephone / fax — sanitizer does not natively distinguish PHONE from FAX.
-  // We map both PHONE_NUMBER and PHONE to PHONE; FAX is only recognised when
-  // a custom recognizer surfaces FAX explicitly.
-  PHONE_NUMBER: 'PHONE',
-  PHONE: 'PHONE',
-  FAX: 'FAX',
-  FAX_NUMBER: 'FAX',
-
-  // Email
   EMAIL: 'EMAIL',
-  EMAIL_ADDRESS: 'EMAIL',
-
-  // US identifier-shaped categories
-  US_SSN: 'SSN',
+  PHONE: 'PHONE',
+  LOCATION: 'GEO_SUBDIVISION',
+  IBAN: 'ACCOUNT_NUMBER', // SEPA bank account numbers
+  CC: 'ACCOUNT_NUMBER', // credit card numbers
   SSN: 'SSN',
-
-  // Medical record / health-plan / account / license / vehicle / device
-  MRN: 'MRN',
-  MEDICAL_RECORD_NUMBER: 'MRN',
-  HEALTH_PLAN_ID: 'HEALTH_PLAN_ID',
-  HEALTH_PLAN_BENEFICIARY_NUMBER: 'HEALTH_PLAN_ID',
-  ACCOUNT_NUMBER: 'ACCOUNT_NUMBER',
-  US_BANK_NUMBER: 'ACCOUNT_NUMBER',
-  IBAN: 'ACCOUNT_NUMBER',
-  IBAN_CODE: 'ACCOUNT_NUMBER',
-  CREDIT_CARD: 'ACCOUNT_NUMBER',
-  CREDIT_CARD_NUMBER: 'ACCOUNT_NUMBER',
-  LICENSE_NUMBER: 'LICENSE_NUMBER',
-  US_DRIVER_LICENSE: 'LICENSE_NUMBER',
-  PROFESSIONAL_LICENSE: 'LICENSE_NUMBER',
-  VEHICLE_ID: 'VEHICLE_ID',
-  VIN: 'VEHICLE_ID',
-  US_VEHICLE_VIN: 'VEHICLE_ID',
-  LICENSE_PLATE: 'VEHICLE_ID',
-  DEVICE_ID: 'DEVICE_ID',
-  DEVICE_SERIAL: 'DEVICE_ID',
-  IMEI: 'DEVICE_ID',
-
-  // Web identifiers
   URL: 'URL',
-  IP_ADDRESS: 'IP_ADDRESS',
-
-  // Biometric / face photo / other unique ID
-  BIOMETRIC_ID: 'BIOMETRIC_ID',
-  FACE_PHOTO_REF: 'FACE_PHOTO_REF',
-  STUDY_ID: 'OTHER_UNIQUE_ID',
-  OTHER_UNIQUE_ID: 'OTHER_UNIQUE_ID',
-  PASSPORT: 'OTHER_UNIQUE_ID',
-  US_PASSPORT: 'OTHER_UNIQUE_ID',
-  US_ITIN: 'OTHER_UNIQUE_ID',
+  DOB: 'DATE',
 });
 
 /**
@@ -137,7 +110,10 @@ export function parsePlaceholderType(placeholder: string): string | null {
 
 /**
  * Map a Lucairn `[TYPE_N]` placeholder to its HIPAA Safe Harbor category.
- * Returns null when the internal type is not in `LUCAIRN_TO_HIPAA`.
+ * Returns null when the internal type is not in `LUCAIRN_TO_HIPAA`. The
+ * documented null cases are `[ID_N]` (collapse-bucket — disambiguation
+ * impossible from the placeholder alone) and `[SECRET_N]` (not a Safe
+ * Harbor category). See the file-level doc-comment for the rationale.
  */
 export function placeholderToHipaaCategory(placeholder: string): HipaaCategory | null {
   const t = parsePlaceholderType(placeholder);
diff --git a/src/mocks/gateway-fixtures.ts b/src/mocks/gateway-fixtures.ts
index 9dbf652..f895514 100644
--- a/src/mocks/gateway-fixtures.ts
+++ b/src/mocks/gateway-fixtures.ts
@@ -32,26 +32,47 @@ export interface MockBuilderOptions {
   readonly spuriousFpCount?: number;
 }
 
-const PLACEHOLDER_FOR_CATEGORY: Readonly<Record<string, string>> = {
+/**
+ * Map from HIPAA Safe Harbor category to the LIVE placeholder prefix the
+ * Lucairn sanitizer would emit in production for that category. Source of
+ * truth for these mappings:
+ *   dual-sandbox-architecture/services/sanitizer/presidio_scan.py:31-58
+ *   (PRESIDIO_TO_PLACEHOLDER dict).
+ *
+ * Locked decisions:
+ *   - FAX uses the same PHONE prefix the sanitizer emits for phone numbers
+ *     (the sanitizer does not natively distinguish FAX from PHONE — fax
+ *     numbers match the PHONE recognizer).
+ *   - MRN, HEALTH_PLAN_ID, LICENSE_NUMBER, VEHICLE_ID, DEVICE_ID,
+ *     IP_ADDRESS, BIOMETRIC_ID, FACE_PHOTO_REF, OTHER_UNIQUE_ID all collapse
+ *     to the sanitizer's ID bucket. The mock therefore emits [ID_N] for
+ *     these — matching production behavior. Their FP attribution surfaces
+ *     in the unmapped_extras accounting (recall.ts:142-167), NOT in the
+ *     per-category HIPAA buckets, exactly as the live path behaves.
+ *   - ACCOUNT_NUMBER picks IBAN deterministically (CC is the alternative).
+ *     Both map back to ACCOUNT_NUMBER via LUCAIRN_TO_HIPAA, so the test
+ *     bookkeeping is symmetric.
+ */
+const PLACEHOLDER_FOR_CATEGORY: Readonly<Record<string, string>> = Object.freeze({
   NAME: 'PERSON',
   GEO_SUBDIVISION: 'LOCATION',
-  DATE: 'DATE',
-  PHONE: 'PHONE_NUMBER',
-  FAX: 'FAX_NUMBER',
-  EMAIL: 'EMAIL_ADDRESS',
-  SSN: 'US_SSN',
-  MRN: 'MEDICAL_RECORD_NUMBER',
-  HEALTH_PLAN_ID: 'HEALTH_PLAN_ID',
-  ACCOUNT_NUMBER: 'ACCOUNT_NUMBER',
-  LICENSE_NUMBER: 'LICENSE_NUMBER',
-  VEHICLE_ID: 'VEHICLE_ID',
-  DEVICE_ID: 'DEVICE_ID',
+  DATE: 'DOB',
+  PHONE: 'PHONE',
+  FAX: 'PHONE', // sanitizer doesn't natively distinguish fax from phone
+  EMAIL: 'EMAIL',
+  SSN: 'SSN',
+  MRN: 'ID', // sanitizer collapses to ID bucket
+  HEALTH_PLAN_ID: 'ID', // sanitizer collapses to ID bucket
+  ACCOUNT_NUMBER: 'IBAN', // deterministic choice; CC is the alternative
+  LICENSE_NUMBER: 'ID', // sanitizer collapses to ID bucket
+  VEHICLE_ID: 'ID', // sanitizer collapses to ID bucket
+  DEVICE_ID: 'ID', // sanitizer collapses to ID bucket
   URL: 'URL',
-  IP_ADDRESS: 'IP_ADDRESS',
-  BIOMETRIC_ID: 'BIOMETRIC_ID',
-  FACE_PHOTO_REF: 'FACE_PHOTO_REF',
-  OTHER_UNIQUE_ID: 'STUDY_ID',
-};
+  IP_ADDRESS: 'ID', // sanitizer collapses IP_ADDRESS to ID (presidio_scan.py:51)
+  BIOMETRIC_ID: 'ID', // sanitizer collapses to ID bucket
+  FACE_PHOTO_REF: 'ID', // sanitizer collapses to ID bucket
+  OTHER_UNIQUE_ID: 'ID', // sanitizer collapses to ID bucket
+});
 
 /**
  * Build a mock gateway response for a single row. Determinism: given the
@@ -88,8 +109,12 @@ export function buildMockResponse(options: MockBuilderOptions): GatewayResponse
   const extras: GroundTruthExtra[] = [];
   for (let i = 0; i < spuriousFpCount; i++) {
     // Synthesise plausible-looking spurious detections so FP-handling code
-    // paths can be exercised. Use deterministic pseudo-text.
-    const internalType = ['PERSON', 'LOCATION', 'PHONE_NUMBER'][i % 3] ?? 'PERSON';
+    // paths can be exercised. Use deterministic pseudo-text. The prefix
+    // rotation includes `ID` so the unmapped_extras accounting path
+    // (recall.ts:142-167) is exercised on at least one of every 4 synthetic
+    // FPs — mirroring production where `[ID_N]` is a common collapse-bucket
+    // placeholder for the sanitizer.
+    const internalType = ['PERSON', 'LOCATION', 'PHONE', 'ID'][i % 4] ?? 'PERSON';
     const nextN = (seqByType.get(internalType) ?? 0) + 1;
     seqByType.set(internalType, nextN);
     extras.push({
diff --git a/test/gateway-client.spec.ts b/test/gateway-client.spec.ts
index 18d3cbe..cea2a42 100644
--- a/test/gateway-client.spec.ts
+++ b/test/gateway-client.spec.ts
@@ -1,4 +1,4 @@
-import { afterAll, afterEach, beforeAll, describe, expect, it } from 'vitest';
+import { afterAll, afterEach, beforeAll, describe, expect, it, vi } from 'vitest';
 import { http, HttpResponse } from 'msw';
 import { setupServer } from 'msw/node';
 
@@ -11,7 +11,11 @@ import type { GatewayResponse } from '../src/gateway-client.js';
 
 const BASE_URL = 'http://gateway.test.local';
 const ENDPOINT = `${BASE_URL}/api/v1/proxy/messages`;
-const API_KEY = 'lcr_live_test_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
+// Synthetic test key. Uses an `lcr_test_` prefix (NOT `lcr_live_`) so the
+// real production key prefix never appears in committed test code — that
+// avoids triggering downstream secret scanners (truffleHog, gitleaks,
+// GitHub secret scanning) once this repo flips public.
+const API_KEY = 'lcr_test_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa';
 
 function successResponse(overrides?: Partial<GatewayResponse>): GatewayResponse {
   return {
@@ -220,4 +224,101 @@ describe('makeGatewayClient', () => {
       /apiKey is required/u,
     );
   });
+
+  it('emits X-Upstream-Key header when upstreamKey is set (Slice 3 BYOK gate)', async () => {
+    // Locks the contract for `dual-sandbox-architecture/services/gateway/
+    // internal/api/proxy.go:349-354` BYOK-per-request profile gate.
+    let observedUpstreamHeader: string | null = null;
+    server.use(
+      http.post(ENDPOINT, async ({ request }) => {
+        observedUpstreamHeader = request.headers.get('x-upstream-key');
+        return HttpResponse.json(successResponse());
+      }),
+    );
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      upstreamKey: 'sk-ant-api03-fake-upstream-test-value',
+      sleepFn: async () => undefined,
+    });
+    await client.runRow({ row_index: 0, transcription: 'x', entities: [] });
+    expect(observedUpstreamHeader).toBe('sk-ant-api03-fake-upstream-test-value');
+  });
+
+  it('omits X-Upstream-Key header when upstreamKey is absent or empty', async () => {
+    let observedUpstreamHeader: string | null = 'sentinel';
+    server.use(
+      http.post(ENDPOINT, async ({ request }) => {
+        observedUpstreamHeader = request.headers.get('x-upstream-key');
+        return HttpResponse.json(successResponse());
+      }),
+    );
+    const clientUnset = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      sleepFn: async () => undefined,
+    });
+    await clientUnset.runRow({ row_index: 0, transcription: 'x', entities: [] });
+    // msw / fetch surface absent headers as null.
+    expect(observedUpstreamHeader).toBeNull();
+
+    observedUpstreamHeader = 'sentinel';
+    const clientEmpty = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      upstreamKey: '', // explicitly empty must be treated as "absent"
+      sleepFn: async () => undefined,
+    });
+    await clientEmpty.runRow({ row_index: 0, transcription: 'y', entities: [] });
+    expect(observedUpstreamHeader).toBeNull();
+  });
+
+  it('filters ground-truth annotations with value.trim().length < 3 (H2 containment-match safety)', async () => {
+    // Defensive guard against future Faker regression — see
+    // src/gateway-client.ts::MIN_GROUND_TRUTH_VALUE_LENGTH and the
+    // ground_truth.go:82-95 cite-back. The gateway's compareGroundTruth
+    // drops empty-after-trim values but NOT 1-2 char values, so a 1-2 char
+    // needle would containment-match into many redactions spuriously.
+    let observedAnnotations: unknown[] = [];
+    server.use(
+      http.post(ENDPOINT, async ({ request }) => {
+        const body = (await request.json()) as Record<string, unknown>;
+        const gt = body['ground_truth'] as { transcription: unknown[] };
+        observedAnnotations = gt.transcription;
+        return HttpResponse.json(successResponse());
+      }),
+    );
+    // Silence the expected console.warn so the test output stays clean
+    // while still verifying the filter fired.
+    const warnSpy = vi
+      .spyOn(console, 'warn')
+      .mockImplementation((): void => undefined);
+    const client = makeGatewayClient({
+      gatewayUrl: BASE_URL,
+      apiKey: API_KEY,
+      sleepFn: async () => undefined,
+    });
+    await client.runRow({
+      row_index: 0,
+      transcription: 'short note',
+      entities: [
+        // length-1 — must be dropped.
+        { category: 'NAME', value: 'X', start_char: 0, end_char: 1 },
+        // length-2 after trim — must be dropped.
+        { category: 'NAME', value: ' AB ', start_char: 0, end_char: 4 },
+        // length-3 — must survive.
+        { category: 'EMAIL', value: 'a@b', start_char: 5, end_char: 8 },
+      ],
+    });
+    expect(observedAnnotations).toHaveLength(1);
+    const kept = observedAnnotations[0] as Record<string, unknown>;
+    expect(kept['type']).toBe('EMAIL');
+    expect(kept['value']).toBe('a@b');
+    // Warning fired with the dropped count, NOT the dropped values.
+    expect(warnSpy).toHaveBeenCalledTimes(1);
+    const firstArg = warnSpy.mock.calls[0]?.[0];
+    expect(typeof firstArg).toBe('string');
+    expect(firstArg as string).toMatch(/dropped 2 ground-truth annotation\(s\)/u);
+    warnSpy.mockRestore();
+  });
 });
diff --git a/test/recall.spec.ts b/test/recall.spec.ts
index 1ce9f19..89f3dcc 100644
--- a/test/recall.spec.ts
+++ b/test/recall.spec.ts
@@ -91,6 +91,36 @@ describe('aggregateExtracted', () => {
     expect(byCat.get('NAME')?.tp).toBe(1);
   });
 
+  it('routes [ID_N] FPs into the unmapped bucket (documented collapse-bucket limitation)', () => {
+    // [ID_N] is the sanitizer's collapse-bucket for many distinct HIPAA
+    // categories (MRN, HEALTH_PLAN_ID, ACCOUNT_NUMBER, LICENSE_NUMBER,
+    // IP_ADDRESS, OTHER_UNIQUE_ID, +4 German custom recognizers + unknown
+    // fallback — cite-back: presidio_scan.py:31-58). The placeholder shape
+    // alone cannot disambiguate the underlying category, so by design
+    // [ID_N] FPs surface in the overall.fp count + the unmapped notes,
+    // NOT in any per_category bucket. This guards against silent
+    // misattribution if a future change tries to "fix" the null-mapping.
+    const extracted: ExtractedRedaction[] = [
+      // hipaa_category null because extractFromEvaluation called
+      // placeholderToHipaaCategory('[ID_1]') and got null back.
+      {
+        row_index: 1,
+        hipaa_category: null,
+        verdict: 'fp',
+        value: 'spurious-id-string',
+        placeholder: '[ID_1]',
+        field: null,
+      },
+    ];
+    const summary = aggregateExtracted(extracted);
+    expect(summary.overall.fp).toBe(1);
+    expect(summary.notes.some((n) => /no HIPAA category mapping/iu.test(n))).toBe(true);
+    // None of the 18 HIPAA categories has fp > 0.
+    for (const entry of summary.per_category) {
+      expect(entry.counts.fp).toBe(0);
+    }
+  });
+
   it('treats absent ground truth as recall=0 with total_annotations=0', () => {
     const summary = aggregateExtracted([]);
     expect(summary.overall.total_annotations).toBe(0);
diff --git a/test/redaction-extractor.spec.ts b/test/redaction-extractor.spec.ts
index e47c858..d61d712 100644
--- a/test/redaction-extractor.spec.ts
+++ b/test/redaction-extractor.spec.ts
@@ -29,15 +29,34 @@ describe('parsePlaceholderType', () => {
 });
 
 describe('placeholderToHipaaCategory', () => {
-  it('maps Lucairn internal types to HIPAA Safe Harbor categories', () => {
+  it('maps Lucairn LIVE placeholder prefixes to HIPAA Safe Harbor categories', () => {
+    // Live placeholder prefixes from presidio_scan.py:31-58
+    // PRESIDIO_TO_PLACEHOLDER right-hand-side values.
     expect(placeholderToHipaaCategory('[PERSON_1]')).toBe('NAME');
     expect(placeholderToHipaaCategory('[LOCATION_2]')).toBe('GEO_SUBDIVISION');
-    expect(placeholderToHipaaCategory('[PHONE_NUMBER_3]')).toBe('PHONE');
-    expect(placeholderToHipaaCategory('[EMAIL_ADDRESS_4]')).toBe('EMAIL');
-    expect(placeholderToHipaaCategory('[US_SSN_5]')).toBe('SSN');
+    expect(placeholderToHipaaCategory('[PHONE_3]')).toBe('PHONE');
+    expect(placeholderToHipaaCategory('[EMAIL_4]')).toBe('EMAIL');
+    expect(placeholderToHipaaCategory('[SSN_5]')).toBe('SSN');
     expect(placeholderToHipaaCategory('[IBAN_6]')).toBe('ACCOUNT_NUMBER');
-    expect(placeholderToHipaaCategory('[URL_7]')).toBe('URL');
-    expect(placeholderToHipaaCategory('[IP_ADDRESS_8]')).toBe('IP_ADDRESS');
+    expect(placeholderToHipaaCategory('[CC_7]')).toBe('ACCOUNT_NUMBER');
+    expect(placeholderToHipaaCategory('[URL_8]')).toBe('URL');
+    expect(placeholderToHipaaCategory('[DOB_9]')).toBe('DATE');
+  });
+
+  it('null-maps [ID_N] and [SECRET_N] by design (documented limitation)', () => {
+    // [ID_N] is the sanitizer's collapse-bucket for MRN, US_BANK_NUMBER,
+    // US_PASSPORT, US_DRIVER_LICENSE, UK_NHS, SG_NRIC_FIN, AU_ABN, AU_TFN,
+    // AU_MEDICARE, IN_PAN, IP_ADDRESS + 4 German custom recognizers, AND the
+    // unknown-entity fallback (cite-back: presidio_scan.py:31-58). The
+    // placeholder shape cannot disambiguate the underlying HIPAA category, so
+    // null-mapping is the correct behavior — the FP count surfaces in the
+    // unmapped_extras accounting (recall.ts:142-167) instead of being
+    // silently misattributed.
+    expect(placeholderToHipaaCategory('[ID_1]')).toBeNull();
+    // [SECRET_N] (W5+ Phase 1, 2026-05-09) is detect-secrets + SaaS-API-key
+    // matches; secrets are not a HIPAA Safe Harbor category in the
+    // 18-enumeration sense (45 CFR § 164.514(b)(2)(i)).
+    expect(placeholderToHipaaCategory('[SECRET_1]')).toBeNull();
   });
 
   it('returns null for placeholders whose internal type is not in the map', () => {
@@ -47,6 +66,32 @@ describe('placeholderToHipaaCategory', () => {
 });
 
 describe('LUCAIRN_TO_HIPAA mapping', () => {
+  // Live placeholder prefix vocabulary from
+  //   dual-sandbox-architecture/services/sanitizer/presidio_scan.py:31-58
+  // (PRESIDIO_TO_PLACEHOLDER dict right-hand-side values). Hard-coded here so
+  // any future addition to that dict that this repo hasn't accounted for
+  // surfaces as a test failure, not as silent FP miscategorization.
+  const LIVE_PLACEHOLDER_PREFIXES = [
+    'PERSON',
+    'EMAIL',
+    'PHONE',
+    'LOCATION',
+    'IBAN',
+    'CC',
+    'SSN',
+    'ID',
+    'URL',
+    'DOB',
+    'SECRET',
+  ] as const;
+
+  // Prefixes intentionally NOT mapped — see hipaa-category-mapping.ts
+  // file-level doc-comment for the rationale.
+  const KNOWN_UNMAPPED: ReadonlySet<string> = new Set([
+    'ID', // collapse-bucket for many distinct HIPAA categories; disambiguation impossible from placeholder alone
+    'SECRET', // not a HIPAA Safe Harbor category in the 18-enumeration sense
+  ]);
+
   it('every right-hand side is a valid HipaaCategory', () => {
     const valid = new Set<string>(HIPAA_CATEGORIES);
     for (const [internalType, hipaa] of Object.entries(LUCAIRN_TO_HIPAA)) {
@@ -54,24 +99,31 @@ describe('LUCAIRN_TO_HIPAA mapping', () => {
     }
   });
 
-  it('covers the standard Presidio/Lucairn vocabulary the gateway emits', () => {
-    // Smoke list of internal types observed in proxy.go::extractEntityTypes
-    // and the Presidio recognizer catalogue. Any future regression where one
-    // of these disappears from the mapping is a Slice 3 hazard.
-    const required = [
-      'PERSON',
-      'LOCATION',
-      'DATE',
-      'PHONE_NUMBER',
-      'EMAIL_ADDRESS',
-      'US_SSN',
-      'IBAN',
-      'URL',
-      'IP_ADDRESS',
-      'CREDIT_CARD',
-    ];
-    for (const t of required) {
-      expect(LUCAIRN_TO_HIPAA[t], `mapping missing for ${t}`).toBeTruthy();
+  it('every live placeholder prefix is either mapped or explicitly null-mapped', () => {
+    // Walk the live vocabulary; each prefix must either appear in
+    // LUCAIRN_TO_HIPAA OR be listed in KNOWN_UNMAPPED. This is the regression
+    // lock against `presidio_scan.py:31-58` drift.
+    for (const prefix of LIVE_PLACEHOLDER_PREFIXES) {
+      const mapped = LUCAIRN_TO_HIPAA[prefix] !== undefined;
+      const unmappedIntentionally = KNOWN_UNMAPPED.has(prefix);
+      expect(
+        mapped || unmappedIntentionally,
+        `prefix ${prefix} (from presidio_scan.py:31-58) must be in LUCAIRN_TO_HIPAA or KNOWN_UNMAPPED`,
+      ).toBe(true);
+    }
+  });
+
+  it('no prefix in LUCAIRN_TO_HIPAA is outside the live placeholder vocabulary', () => {
+    // Inverse guard — if someone adds a stale alias (e.g. PHONE_NUMBER or
+    // EMAIL_ADDRESS) to the mapping table, it must correspond to something
+    // the sanitizer actually emits. Otherwise the entry is dead code masking
+    // real drift.
+    const liveSet = new Set<string>(LIVE_PLACEHOLDER_PREFIXES);
+    for (const internalType of Object.keys(LUCAIRN_TO_HIPAA)) {
+      expect(
+        liveSet.has(internalType),
+        `LUCAIRN_TO_HIPAA[${internalType}] is not in the live placeholder vocabulary (presidio_scan.py:31-58)`,
+      ).toBe(true);
     }
   });
 });

From 5fb5c49e186e0cd6ae9cd3e8be5ac7f3cdbbb71e Mon Sep 17 00:00:00 2001
From: Declade <110547349+Declade@users.noreply.github.com>
Date: Sun, 17 May 2026 12:11:41 +0200
Subject: [PATCH 5/5] fix(slice-2): close codex r1 findings ([8] help-text +
 [21] CSV-superset rationale)

- [8] FAIL: --upstream-key help table listed 2 unsupported auth modes
  ("not supported by this harness") and omitted the --mock path entirely.
  Rewrote the table to enumerate the 3 actually-supported modes:
  --mock (no auth), --live + --api-key (non-BYOK), --live + --api-key +
  --upstream-key (BYOK-per-request, cite proxy.go:349-354 gate).

- [21] FAIL: CERTIFICATES.csv ships 9 columns vs the brief's 7 minimum.
  The 2 extensions (summary_url, error_code) are intentional:
  summary_url saves readers a URL-construction step; error_code makes
  the paper appendix honest about which rows failed instead of silently
  dropping them. Documented the rationale inline in collect-certs.ts
  before the headers array. All 7 brief-required columns remain present
  in declaration order. Treating this as effective-PASS at the
  orchestrator level: brief spec was a minimum, not an exclusive list.

No code-behavior changes. typecheck/build/test all green at HEAD.
---
 scripts/collect-certs.ts | 11 +++++++++++
 scripts/run-pipeline.ts  | 10 +++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/scripts/collect-certs.ts b/scripts/collect-certs.ts
index 0e71d7e..f521057 100644
--- a/scripts/collect-certs.ts
+++ b/scripts/collect-certs.ts
@@ -118,6 +118,17 @@ async function main(): Promise<void> {
     }
   }
 
+  // CERTIFICATES.csv schema. Slice 2 dispatch brief specified a 7-column
+  // minimum (row_index, cert_url, cert_id, overall_verdict, redaction_count,
+  // latency_ms, timestamp_utc). We ship a superset of 9 columns; the two
+  // extensions over the brief minimum are:
+  //   - summary_url:  the /summary HTML-view sibling of cert_url. Included
+  //                   so readers / auditors can paste directly into a
+  //                   browser without reconstructing the URL.
+  //   - error_code:   per-row failure code (empty string when row succeeded).
+  //                   Included so the paper appendix honestly records which
+  //                   rows failed instead of silently dropping them.
+  // The 7 brief-required columns are all present in their requested order.
   const headers = [
     'row_index',
     'cert_url',
diff --git a/scripts/run-pipeline.ts b/scripts/run-pipeline.ts
index 1a057a3..7ffbfae 100644
--- a/scripts/run-pipeline.ts
+++ b/scripts/run-pipeline.ts
@@ -182,11 +182,11 @@ function printHelp(): void {
     '  --spurious-fp-count=N --mock only. Synthetic FP redactions per row. Default: 0.',
     '  --activity-id-prefix=S  per-row activity_id prefix. Default: paper-1-healthcare.',
     '',
-    'Auth modes for --live runs (4 valid combinations):',
-    '  1. lcr_live_* key + non-BYOK customer profile     → only --api-key / LUCAIRN_API_KEY required.',
-    '  2. lcr_live_* key + ByokPerRequest profile         → --api-key + --upstream-key both required.',
-    '  3. Direct provider key + X-DSA-Key auth fallback   → not supported by this harness.',
-    '  4. Authorization: Bearer relay                     → not supported by this harness.',
+    'Auth modes (3 supported by this harness; covers Slice 2 mock + Slice 3 live):',
+    '  1. --mock                                          → no auth required; in-process msw mock; tests + dev.',
+    '  2. --live + --api-key                              → non-BYOK customer profile (Lucairn-managed AI).',
+    '  3. --live + --api-key + --upstream-key             → BYOK-per-request profile; gateway gate at',
+    '                                                       dual-sandbox-architecture/services/gateway/internal/api/proxy.go:349-354.',
     'Slice 2 ships --mock support only. --live is reserved for Slice 3 and requires Marc-confirmation.',
   ];
   for (const ln of lines) {