+
{summaryParts.map((part, i) => (
@@ -434,34 +432,6 @@ function ResultsToolbar({ totalCount, searchParams, classifyProgress }) {
)}
-
-
- sort
-
- {SORT_OPTIONS.map(opt => {
- const active = sort === opt.id
- return (
- !opt.disabled && setSort(opt.id)}
- disabled={opt.disabled}
- title={opt.title}
- {...(opt.disabled ? {} : { 'aria-pressed': active })}
- className={[
- 'text-[11px] px-2 py-0.5 rounded-md transition-colors',
- opt.disabled
- ? 'text-parchment-500 cursor-not-allowed'
- : active
- ? 'bg-iris-50 text-iris-700 font-medium'
- : 'text-parchment-700 hover:text-parchment-950 hover:bg-parchment-100',
- ].join(' ')}
- >
- {opt.label}
-
- )
- })}
-
)
}
From 1b3e7598f2d8e52a181fa68a6c808846637da22a Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Thu, 7 May 2026 00:54:49 -0400
Subject: [PATCH 27/31] chore: post-merge follow-ups (matchMedia, regression
test, doc move)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Three of the deferred items from PR #1's review, addressed in one
follow-up to keep the phase-3 PR from carrying open feedback debt:
1. useIsMobile uses matchMedia.change instead of window 'resize'
(ResultsList.jsx:36-50). iOS Safari fires 'resize' inconsistently
on rotation; matchMedia.change is the reliable signal and also
catches iPad split-screen + browser-window mode switches.
src/test/setup.js stubs matchMedia (jsdom doesn't ship it) so
the existing ResultsList tests keep rendering the desktop two-pane
path.
2. New regression test for the queued-submit drain in
NaturalLanguageInput. Indirect coverage for the StrictMode listener
re-attach fix in useNLP — if the listener doesn't reach the worker
'ready' message, the queued submit never fires and this test fails.
The bug surfaced this session: status was getting stuck at
'downloading' forever in dev because the listener detached on
StrictMode's first cleanup never re-attached. Now: pin the contract.
3. shared/iris-shared.jsx (478-line design reference, never imported
by src/) moved to docs/design-references-shared/ with a README
explaining its role. Stops future readers (LLM or human) from
trying to "fix" or "consolidate" it as if it were live code.
Contrast-check (#3 in the review): computed iris-700 on parchment-50
yields ~9.6:1 (WCAG AAA). iris-700 on iris-50 is similar (~8.5:1).
All iris-violet links and the model badge clear AA easily; no
palette change needed. Lighthouse can confirm at deploy time.
Compare-state lift (#1 in the review): deferred to its own follow-up
PR alongside the actual compare view (currently a placeholder).
---
docs/design-references-shared/README.md | 17 +++++++++
.../design-references-shared}/iris-shared.jsx | 0
src/components/NaturalLanguageInput.test.jsx | 38 +++++++++++++++++++
src/components/ResultsList.jsx | 14 +++++--
src/test/setup.js | 18 ++++++++-
5 files changed, 82 insertions(+), 5 deletions(-)
create mode 100644 docs/design-references-shared/README.md
rename {shared => docs/design-references-shared}/iris-shared.jsx (100%)
diff --git a/docs/design-references-shared/README.md b/docs/design-references-shared/README.md
new file mode 100644
index 0000000..489ae34
--- /dev/null
+++ b/docs/design-references-shared/README.md
@@ -0,0 +1,17 @@
+# shared/iris-shared.jsx — design reference, not source
+
+Reference implementations from the original Claude.ai design exploration.
+Components in this file (`IrisHeader`, `IrisSearchBar`, `LocalAIBadge`,
+`FitMeter`, `StatusPill`, `ActionRow`, `StreamingText`, …) were ported into
+the live React app under `src/components/` and `src/utils/` — the versions
+here are kept verbatim so a future reader can compare implementations
+against the original prototype.
+
+**Do not import from this file in `src/`.** It runs against a Babel-standalone
+environment in `IRIS Triage.html` and uses inline-style patterns the live
+app intentionally moved away from (the live app uses Tailwind utility
+classes on top of CSS custom properties from `styles/tokens.css`).
+
+If you're trying to "fix" or "consolidate" this file: stop. Edit the live
+component under `src/components/` instead. The existence of this file is
+documentation, not duplication.
diff --git a/shared/iris-shared.jsx b/docs/design-references-shared/iris-shared.jsx
similarity index 100%
rename from shared/iris-shared.jsx
rename to docs/design-references-shared/iris-shared.jsx
diff --git a/src/components/NaturalLanguageInput.test.jsx b/src/components/NaturalLanguageInput.test.jsx
index ca59891..a9fd691 100644
--- a/src/components/NaturalLanguageInput.test.jsx
+++ b/src/components/NaturalLanguageInput.test.jsx
@@ -158,3 +158,41 @@ describe('NaturalLanguageInput — error state', () => {
expect(screen.getByText(/try again/i)).toBeInTheDocument()
})
})
+
+describe('NaturalLanguageInput — queued submit during download', () => {
+ // Locks in the typing-while-loading flow: a user can hit Find trials while
+ // the model is still downloading; the intent is held until status flips to
+ // 'ready' and then auto-fires. Indirect smoke test for the StrictMode
+ // listener fix in useNLP — if the listener didn't re-attach after the dev
+ // double-invoke, the real-world status would never reach 'ready' and the
+ // drain effect (deps [status, pendingSubmit]) would never fire.
+ it('queues submit while downloading, fires extract once status flips to ready', async () => {
+ const extract = vi.fn().mockResolvedValue({
+ condition: 'breast cancer', location: null, age: 58, sex: 'FEMALE',
+ status: 'RECRUITING', phases: [],
+ })
+ useNLP.mockReturnValue({ ...baseHook, status: 'downloading', extract })
+ localStorage.setItem('iris_nlp_enabled', 'true')
+
+ const onExtract = vi.fn()
+ const { rerender } = render(
)
+ fireEvent.click(screen.getByRole('button', { name: /describe in your own words/i }))
+
+ fireEvent.change(screen.getByRole('textbox', { name: /natural language search/i }), {
+ target: { value: '58 with breast cancer' },
+ })
+
+ // Submit while downloading — should queue, NOT fire extract yet.
+ fireEvent.click(screen.getByRole('button', { name: /Run when ready/i }))
+ expect(extract).not.toHaveBeenCalled()
+ expect(screen.getByRole('button', { name: /Queued/i })).toBeInTheDocument()
+
+ // Worker reports ready. In production this comes via the listener that
+ // the StrictMode fix ensures stays attached after the cleanup-remount.
+ useNLP.mockReturnValue({ ...baseHook, status: 'ready', extract })
+ rerender(
)
+
+ await waitFor(() => expect(extract).toHaveBeenCalledWith('58 with breast cancer'))
+ await waitFor(() => expect(onExtract).toHaveBeenCalled())
+ })
+})
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index 61fda0e..8dfb4bf 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -33,14 +33,20 @@ const EAGER_BATCH_SIZE = 5
const MOBILE_BREAKPOINT_PX = 820
const LIST_WIDTH_PX = 400
+// matchMedia (not 'resize'): iOS Safari fires 'resize' inconsistently on
+// rotation; matchMedia.change is the reliable signal. Also catches iPad
+// split-screen and browser-window mode switches without a manual resize.
function useIsMobile() {
+ const query = `(max-width: ${MOBILE_BREAKPOINT_PX}px)`
const [isMobile, setIsMobile] = useState(() =>
- typeof window !== 'undefined' && window.innerWidth <= MOBILE_BREAKPOINT_PX
+ typeof window !== 'undefined' && window.matchMedia(query).matches
)
useEffect(() => {
- const onResize = () => setIsMobile(window.innerWidth <= MOBILE_BREAKPOINT_PX)
- window.addEventListener('resize', onResize)
- return () => window.removeEventListener('resize', onResize)
+ const mq = window.matchMedia(query)
+ const onChange = (e) => setIsMobile(e.matches)
+ mq.addEventListener('change', onChange)
+ return () => mq.removeEventListener('change', onChange)
+ // eslint-disable-next-line react-hooks/exhaustive-deps
}, [])
return isMobile
}
diff --git a/src/test/setup.js b/src/test/setup.js
index 7c891de..d8865f2 100644
--- a/src/test/setup.js
+++ b/src/test/setup.js
@@ -1,7 +1,23 @@
import '@testing-library/jest-dom'
-import { afterEach } from 'vitest'
+import { afterEach, vi } from 'vitest'
import { cleanup } from '@testing-library/react'
+// jsdom doesn't ship matchMedia; ResultsList uses it for the mobile
+// breakpoint detector. Stub it to "desktop" (does-not-match) by default
+// so the two-pane code path renders in tests.
+if (typeof window !== 'undefined' && !window.matchMedia) {
+ window.matchMedia = vi.fn().mockImplementation((query) => ({
+ matches: false,
+ media: query,
+ onchange: null,
+ addEventListener: vi.fn(),
+ removeEventListener: vi.fn(),
+ addListener: vi.fn(), // legacy
+ removeListener: vi.fn(), // legacy
+ dispatchEvent: vi.fn(),
+ }))
+}
+
afterEach(() => {
cleanup()
})
From d473033a4881f4359a91b9bb6c2398154b9058fa Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Thu, 7 May 2026 00:56:58 -0400
Subject: [PATCH 28/31] fix(phase-3): re-run classification when patient
description changes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
User reported: hitting "Find trials" again with a refined prompt did
nothing visible. The classifier kept its old verdicts; the toolbar
sat at "fit evaluated for N" against a stale patient description.
The classification-reset effect was watching only searchParams. If
the refined prompt extracted to the same condition (e.g., "breast
cancer" → still "breast cancer"), the API result set was cached and
the trials list didn't change either. classifiedRef carried the old
NCT IDs into the next render, so newTrials.length === 0 and the
classification effect short-circuited.
Two changes:
- Add patientDesc to the reset effect's deps so a prompt change
alone (no condition change) wipes classifications + classifiedRef
and cancels the in-flight batch via cancelClassifyRef.
- Cancel the simplifier's pending queue too — otherwise an in-flight
summary keeps the worker busy while the re-classification waits
for it to drain.
Effect: change the prompt → classifications wipe → fit dots reset to
shimmer → toolbar shows "evaluating fit · 0 of N" again → classifier
re-runs against the new patient → fit dots refill.
---
src/components/ResultsList.jsx | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index 8dfb4bf..2b72a00 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -117,13 +117,23 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
nlp.load(model.id, { isThinking: model.isThinking, chatOpts: model.chatOpts })
}, [canClassify, nlp.status, modelKey, nlp])
- // Reset classification state when the search itself changes.
+ // Reset classification state when EITHER the search params OR the patient
+ // description changes. Including patientDesc handles the case where a user
+ // hits "Find trials" again with a refined prompt that happens to extract
+ // to the same condition: the API result set may be cached (same trials)
+ // but the verdicts are now stale w.r.t. the new patient description, so
+ // classifications + classifiedRef must be wiped and the in-flight batch
+ // cancelled so the next pass re-classifies against the new patient.
+ // Also resets the simplifier so any in-flight summary stops competing
+ // with the re-classification pass.
useEffect(() => {
classifiedRef.current = new Set()
setClassifications(new Map())
setClassifyProgress({ done: 0, total: 0 })
if (cancelClassifyRef.current) cancelClassifyRef.current()
- }, [searchParams])
+ simplifier.cancelPending()
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [searchParams, patientDesc])
function toggleCompare(nctId) {
setCompareSet(prev => {
From 71926e66cb0e3da9e9d05d4d681ed476379663a6 Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Thu, 7 May 2026 01:03:04 -0400
Subject: [PATCH 29/31] fix(phase-3): address PR #2 review must-fixes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two correctness fixes from code review:
1. useClassifier.js:34-49 — listener cleanup now also rejects every
in-flight classify in pendingRef. Previously: on unmount (or
StrictMode dev cleanup) the listener detached but pendingRef Map
still held resolve/reject handles — those promises hung forever.
Now any awaiting caller sees a clean rejection with
"classifier unmounted" so error paths fire and the queue clears.
2. ResultsList.jsx:118 — replaced `nlp` (whole hook return object)
in the load-trigger effect deps with `nlpLoad` destructured. The
useNLP hook doesn't memoize its return object, so each render
produced a new ref → effect ran every render. The status guard
prevented redundant load() calls but the body still ran. Now the
effect only fires when load callback identity changes (it's
useCallback'd with stable deps so essentially never), or when
canClassify / status / modelKey change.
Both surfaced by post-merge code review of PR #2; neither blocks
shipping but both are trivial to land before merge.
---
src/components/ResultsList.jsx | 11 ++++++++---
src/hooks/useClassifier.js | 10 ++++++++++
2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index 2b72a00..ad99d6c 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -109,13 +109,18 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
const canClassify = consented && nlp.webGPUSupported && Boolean(patientDesc)
// Idempotent: worker fast-returns 'ready' if engine already loaded
- // (e.g. NL extraction loaded it earlier this session).
+ // (e.g. NL extraction loaded it earlier this session). Destructure
+ // load() out of nlp so we can list it in deps directly — `nlp` itself
+ // is a fresh object on every render (useNLP doesn't memoize its
+ // return), and listing the whole hook would re-fire the effect on
+ // every render even when nothing relevant changed.
+ const nlpLoad = nlp.load
useEffect(() => {
if (!canClassify) return
if (nlp.status !== 'idle') return
const model = NLP_MODELS[modelKey] ?? NLP_MODELS.gemma
- nlp.load(model.id, { isThinking: model.isThinking, chatOpts: model.chatOpts })
- }, [canClassify, nlp.status, modelKey, nlp])
+ nlpLoad(model.id, { isThinking: model.isThinking, chatOpts: model.chatOpts })
+ }, [canClassify, nlp.status, modelKey, nlpLoad])
// Reset classification state when EITHER the search params OR the patient
// description changes. Including patientDesc handles the case where a user
diff --git a/src/hooks/useClassifier.js b/src/hooks/useClassifier.js
index 52b1ba1..2004aac 100644
--- a/src/hooks/useClassifier.js
+++ b/src/hooks/useClassifier.js
@@ -35,9 +35,19 @@ export function useClassifier() {
}
useEffect(() => {
+ const pending = pendingRef.current
return () => {
detachRef.current?.()
detachRef.current = null
+ // Reject every in-flight classify so awaiting callers don't hang
+ // forever when the component unmounts mid-batch (or during a
+ // StrictMode dev double-invoke). Without this, the listener
+ // detaches but the pendingRef Map still holds resolve/reject
+ // handles whose promise will never settle.
+ for (const { reject } of pending.values()) {
+ reject(new Error('classifier unmounted'))
+ }
+ pending.clear()
}
}, [])
From e3cefa6787a467cffa40f1e655578883187a5577 Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Thu, 7 May 2026 01:13:30 -0400
Subject: [PATCH 30/31] chore(phase-3): post-review cleanup + disable
classify-in-results
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Bundles all eight follow-up items the reviewer flagged on PR #2,
plus the user-requested decision to disable classify-driven UI in
the results view (the fit dots had no actionable consequence
without sort wiring, so they were just decoration).
User-visible
- Classify-in-results gated behind ENABLE_CLASSIFY_IN_RESULTS = false
(ResultsList.jsx). The classifier hook + worker task + ?test=classify
harness all stay live for prompt iteration; only the in-app fit
dots and "evaluating fit · X of N" caption are suppressed. Flip
the constant to true once "Best fit" sort lands so the dots become
actionable.
Refactors / dedupe
- Moved 305-line SAMPLE_TRIALS array + USER_PRESETS list out of
ClassificationHarness.jsx into a sibling fixtures file
(ClassificationHarness.fixtures.js). Harness file dropped from
924 → ~550 lines.
- Harness now imports DEFAULT_CLASSIFY_PROMPT and parseVerdict from
utils/classifyTrial.js instead of duplicating both verbatim. Single
source of truth — prompt tweaks no longer have to be made in two
places.
- useIsMobile hoisted out of ResultsList.jsx into hooks/useIsMobile.js
so any other component that needs the same breakpoint can import
it without copy-pasting.
Small fixes
- patientDesc in ResultsList wrapped in useMemo (was recomputed every
render — value-equality made it work but the implicit reliance
was fragile).
- FitDot in TriageRow folds the model's reason into aria-label so
screen readers and keyboard-focused users get the same context as
a sighted hover (title alone reaches neither group reliably). Same
string in both attrs. Added role="img" since it carries semantic
content now.
New worker task type
- Added 'translate' message type to nlp.worker.js. max_tokens 200
(vs classify's 80) so verbose-language paraphrases fit. Same low
temperature (0.1) since translation wants fidelity, not creativity.
- useClassifier now exposes both classifyOne and translateOne, sharing
the single promise chain (engine is single-threaded regardless of
task type). handleMessage routes done/error events for both via a
single isDone/isError predicate.
- Harness translate-first toggle now uses translateOne instead of
overloading classifyOne — clarifies intent and lets the worker
apply the right max_tokens budget.
New tests
- useClassifier.test.js: three tests covering serialization
(concurrent calls post FIFO), error isolation (one rejection
doesn't poison the queue), and unmount cleanup (pending tasks
reject with 'classifier unmounted'). Mock the shared worker via
vi.mock so the tests don't touch real WebLLM. 197/197 pass.
Skipped from the review
- "Simplifier idle gap during model load" — the gap is intentional
per Handoff Phase 3 step 6 (stage-2 only after stage-1 completes),
and the pipeline-stage caption already addresses the UX. Reviewer's
suggestion to "let the simplifier proceed" would re-introduce the
classifier-vs-simplifier worker contention we explicitly fixed.
---
.../ClassificationHarness.fixtures.js | 332 +++++++++++++++
src/components/ClassificationHarness.jsx | 388 +-----------------
src/components/ResultsList.jsx | 35 +-
src/components/TriageRow.jsx | 17 +-
src/hooks/useClassifier.js | 55 +--
src/hooks/useClassifier.test.js | 94 +++++
src/hooks/useIsMobile.js | 21 +
src/workers/nlp.worker.js | 29 ++
8 files changed, 538 insertions(+), 433 deletions(-)
create mode 100644 src/components/ClassificationHarness.fixtures.js
create mode 100644 src/hooks/useClassifier.test.js
create mode 100644 src/hooks/useIsMobile.js
diff --git a/src/components/ClassificationHarness.fixtures.js b/src/components/ClassificationHarness.fixtures.js
new file mode 100644
index 0000000..c33b3bc
--- /dev/null
+++ b/src/components/ClassificationHarness.fixtures.js
@@ -0,0 +1,332 @@
+// Fixture data for the dev-only Classification Harness (?test=classify).
+// Lives next to the component but split out because the trial array is
+// 300+ lines and made the harness file hard to navigate when iterating
+// on prompts vs data.
+//
+// `outOfScope: true` flags trials the CT.gov API would NOT return for
+// a breast-cancer search — kept in the fixture as wrong-condition
+// stress tests, but the harness's "production-realistic agreement"
+// toggle excludes them from the headline metric.
+
+export const SAMPLE_TRIALS = [
+ {
+ nctId: 'NCT05952557',
+ title: 'Phase IIIb Study of Ribociclib + Endocrine Therapy in Early Breast Cancer',
+ eligibility: 'Inclusion: Adult female, ≥18 years. HR-positive, HER2-negative early breast cancer. Completed definitive surgery. Postmenopausal status confirmed. ECOG 0-1. Adequate organ function. Exclusion: Prior CDK4/6 inhibitor. Pregnancy or breastfeeding. Active second malignancy.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT06104020',
+ title: 'Sacituzumab Govitecan in Metastatic Triple-Negative Breast Cancer',
+ eligibility: 'Inclusion: Adult, any sex. Histologically confirmed metastatic triple-negative breast cancer (ER<1%, PR<1%, HER2-negative). At least one prior line of systemic therapy in metastatic setting. ECOG 0-2. Measurable disease per RECIST 1.1. Exclusion: Active CNS metastases. Prior topoisomerase I inhibitor.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05887492',
+ title: 'Adaptive Radiation Boost in Locally Advanced HER2+ Breast Cancer',
+ eligibility: 'Inclusion: Adult female. HER2-positive breast cancer confirmed by IHC 3+ or FISH-positive. Stage II-III disease. Completed neoadjuvant chemotherapy. ECOG 0-1. Exclusion: Prior radiation to chest. Pregnancy.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT06221340',
+ title: 'Aerobic Exercise During Adjuvant Chemo for Breast Cancer Survivors',
+ eligibility: 'Inclusion: Adult, any sex. Breast cancer, any stage. Currently receiving or scheduled for adjuvant chemotherapy. Cleared by oncologist for moderate exercise. Exclusion: Cardiac contraindications.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT04123456',
+ title: 'Pembrolizumab in Advanced Non-Small Cell Lung Cancer',
+ eligibility: 'Inclusion: Adult. Histologically confirmed advanced NSCLC. PD-L1 expression ≥50%. ECOG 0-1. Exclusion: Active autoimmune disease. Prior immunotherapy.',
+ expected: 'UNLIKELY',
+ outOfScope: true, // NSCLC — wouldn't appear in a breast-cancer API search
+ },
+ {
+ nctId: 'NCT05123987',
+ title: 'Targeted Therapy in Pediatric Acute Lymphoblastic Leukemia',
+ eligibility: 'Inclusion: Pediatric patients aged 2-17 years. Newly diagnosed ALL. Exclusion: Adults. Prior chemotherapy.',
+ expected: 'UNLIKELY',
+ outOfScope: true, // Pediatric ALL — wouldn't appear in a breast-cancer API search
+ },
+
+ // ─── Subtype-gated breast cancer trials — POSSIBLE without confirmed subtype ───
+ {
+ nctId: 'NCT05300100',
+ title: 'Tucatinib + Trastuzumab in HER2-Positive Metastatic Breast Cancer',
+ eligibility: 'Inclusion: Adult, any sex, ≥18 years. Histologically confirmed HER2-positive metastatic breast cancer (IHC 3+ or FISH-amplified). At least 2 prior HER2-directed therapies. ECOG 0-1. Exclusion: Untreated brain metastases. Prior tucatinib.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05400201',
+ title: 'Olaparib Maintenance in BRCA-Mutated HER2-Negative Breast Cancer',
+ eligibility: 'Inclusion: Adult female. HER2-negative breast cancer with germline BRCA1 or BRCA2 mutation (confirmed by central testing). High-risk early disease following adjuvant chemotherapy. Postmenopausal or premenopausal with ovarian suppression. Exclusion: Prior PARP inhibitor.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05511223',
+ title: 'CDK4/6 Inhibitor Switch in Hormone-Receptor-Positive Advanced Breast Cancer',
+ eligibility: 'Inclusion: Adult women, postmenopausal. HR-positive, HER2-negative advanced or metastatic breast cancer. Disease progression on a prior CDK4/6 inhibitor. ECOG 0-2.',
+ expected: 'POSSIBLE',
+ },
+
+ // ─── Strong matches for a 58yo with breast cancer ───
+ {
+ nctId: 'NCT05633445',
+ title: 'Cognitive Behavioral Therapy for Cancer-Related Fatigue',
+ eligibility: 'Inclusion: Adults ≥18 years with any solid tumor diagnosis (breast, colon, lung, prostate, etc.). Currently in active treatment or within 5 years of treatment completion. Self-reported fatigue ≥4 on a 0-10 scale. Exclusion: Severe untreated depression. Inability to attend weekly sessions.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT05755677',
+ title: 'Lymphedema Surveillance Program After Breast Cancer Surgery',
+ eligibility: 'Inclusion: Adult female ≥18 years. History of breast cancer treated with axillary surgery (sentinel lymph node biopsy or axillary dissection). Within 3 years of surgery. Exclusion: Pre-existing lymphedema. Current breast cancer recurrence.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT05822334',
+ title: 'Mindfulness-Based Stress Reduction for Breast Cancer Survivors',
+ eligibility: 'Inclusion: Adult women ≥21 years. Diagnosed with breast cancer (any stage). Completed primary treatment within the past 5 years OR currently on adjuvant endocrine therapy. Exclusion: Active psychosis. Prior MBSR participation.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT05901128',
+ title: 'Vaginal Estrogen Safety Study in Postmenopausal Breast Cancer Survivors',
+ eligibility: 'Inclusion: Postmenopausal women ages 45-75 with a history of HR-positive or HR-negative breast cancer. Disease-free for ≥1 year. Genitourinary symptoms of menopause. Stable on aromatase inhibitor or tamoxifen, or treatment-free. Exclusion: Current metastatic disease.',
+ expected: 'LIKELY',
+ },
+
+ // ─── Wrong condition / wrong demographic — clear UNLIKELY ───
+ {
+ nctId: 'NCT04567890',
+ title: 'Pembrolizumab in Advanced Melanoma',
+ eligibility: 'Inclusion: Adults with histologically confirmed unresectable Stage III or Stage IV melanoma. ECOG 0-1. No prior systemic therapy for advanced disease. Exclusion: Active autoimmune disease.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04678901',
+ title: 'Apixaban vs. Warfarin in Atrial Fibrillation',
+ eligibility: 'Inclusion: Adults ≥18 years with non-valvular atrial fibrillation. CHA2DS2-VASc score ≥2. Exclusion: Mechanical heart valve. Active bleeding.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04789012',
+ title: 'GLP-1 Agonist for Weight Management in Type 2 Diabetes',
+ eligibility: 'Inclusion: Adults 18-75 with Type 2 diabetes mellitus. BMI ≥30. HbA1c 7.0-10.0%. Exclusion: Type 1 diabetes. Active malignancy within 5 years. History of pancreatitis.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04890123',
+ title: 'Robotic Prostatectomy Outcomes in Localized Prostate Cancer',
+ eligibility: 'Inclusion: Men ≥40 years with biopsy-confirmed clinically localized prostate cancer (T1-T2). Candidate for radical prostatectomy. Exclusion: Prior pelvic surgery or radiation.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04901234',
+ title: 'Pediatric Vaccine Immunogenicity Study',
+ eligibility: 'Inclusion: Healthy children aged 6 months to 5 years. Up to date on routine immunizations. Exclusion: Immunocompromised. Recent illness within 14 days.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+
+ // ─── Edge cases — should challenge the model ───
+ {
+ nctId: 'NCT05012345',
+ title: 'Palliative Care Integration in Patients with Advanced Solid Tumors',
+ eligibility: 'Inclusion: Adults ≥18 years with advanced (Stage IV) solid tumor of any primary site (breast, lung, GI, GU, GYN). Estimated prognosis 6-24 months. ECOG 0-3. Exclusion: Currently enrolled in hospice.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05123450',
+ title: 'Premenopausal Breast Cancer: Ovarian Function Suppression Trial',
+ eligibility: 'Inclusion: Premenopausal women ages 18-45 with newly diagnosed HR-positive early breast cancer. Confirmed premenopausal by FSH and estradiol levels. Exclusion: Postmenopausal status. Prior ovarian suppression therapy.',
+ expected: 'UNLIKELY',
+ },
+
+ // ─── Realistic-length eligibility (~2-3.5kB each) — stress-tests how the
+ // model handles formal CT.gov noise and how truncation affects accuracy.
+ // Try these with eligMax = 800 vs 3000 vs 6000 to see the trade-off.
+ {
+ nctId: 'NCT-LONG-01',
+ title: 'Phase II Study of Sacituzumab Govitecan-hziy in Patients with HR-Positive, HER2-Negative Metastatic Breast Cancer After Endocrine Therapy and CDK4/6 Inhibitor',
+ eligibility: `Inclusion Criteria:
+
+1. Female participants ≥18 years of age at the time of signing informed consent.
+2. Histologically or cytologically confirmed adenocarcinoma of the breast that is metastatic or locally advanced and not amenable to curative resection or radiotherapy.
+3. Documentation of estrogen receptor (ER)-positive (≥1% staining by IHC) and/or progesterone receptor (PR)-positive (≥1% staining by IHC) tumor status, in accordance with ASCO/CAP guidelines.
+4. Documentation of HER2-negative status defined as IHC 0, IHC 1+, or IHC 2+ with negative in situ hybridization (ISH), per ASCO/CAP guidelines.
+5. Disease progression on or after at least one prior CDK4/6 inhibitor (palbociclib, ribociclib, or abemaciclib) administered for advanced or metastatic disease, in combination with an aromatase inhibitor or fulvestrant.
+6. Disease progression on or after at least one and no more than two prior endocrine therapies (e.g., aromatase inhibitor, fulvestrant, tamoxifen) for advanced or metastatic disease.
+7. No more than one prior chemotherapy regimen for metastatic disease.
+8. Postmenopausal status, OR premenopausal/perimenopausal women who agree to receive concurrent ovarian function suppression with a luteinizing hormone-releasing hormone (LHRH) agonist throughout study treatment.
+9. Measurable disease per RECIST v1.1, or non-measurable bone-only disease assessable per protocol-specified criteria.
+10. ECOG performance status 0 or 1.
+11. Adequate organ function:
+ - Absolute neutrophil count (ANC) ≥1.5 × 10^9/L
+ - Platelets ≥100 × 10^9/L
+ - Hemoglobin ≥9.0 g/dL (transfusion permitted)
+ - Total bilirubin ≤1.5 × ULN (≤3 × ULN for participants with documented Gilbert syndrome)
+ - AST and ALT ≤2.5 × ULN (≤5 × ULN if liver metastases present)
+ - Creatinine clearance ≥50 mL/min by Cockcroft-Gault equation
+ - INR and aPTT ≤1.5 × ULN unless on anticoagulants
+12. Resolution of all acute toxic effects of prior anti-cancer therapy or surgical procedures to NCI CTCAE v5.0 Grade ≤1 (except alopecia and Grade 2 neuropathy).
+13. Willingness to provide tumor tissue (archival or fresh biopsy) for biomarker analyses.
+
+Exclusion Criteria:
+
+1. Prior treatment with sacituzumab govitecan or any other Trop-2-directed therapy.
+2. Prior treatment with an antibody-drug conjugate containing a topoisomerase I inhibitor payload (e.g., trastuzumab deruxtecan).
+3. Active CNS metastases. Participants with previously treated, asymptomatic CNS metastases are eligible if clinically stable for ≥4 weeks off corticosteroids and anticonvulsants.
+4. Leptomeningeal disease.
+5. Known active infection requiring systemic therapy, including untreated HIV, active HBV (HBsAg positive or HBV DNA detectable), or active HCV (HCV RNA detectable).
+6. Significant cardiovascular disease, including: NYHA Class III or IV congestive heart failure, myocardial infarction or unstable angina within 6 months, uncontrolled arrhythmia, baseline QTcF >470 ms.
+7. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer, in situ cervical or breast cancer, or low-risk localized prostate cancer on active surveillance.
+8. Known hypersensitivity to irinotecan or any component of the study drug formulation.
+9. Pregnant or breastfeeding women. Women of childbearing potential must agree to use highly effective contraception during the study and for 6 months after the last dose.
+10. Concurrent participation in another therapeutic clinical trial.
+11. Major surgery within 4 weeks prior to first dose.
+12. Live vaccines within 30 days prior to first dose.`,
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT-LONG-02',
+ title: 'Randomized Phase III Trial of Adjuvant Endocrine Therapy ± Abemaciclib in Postmenopausal Women with HR-Positive, HER2-Negative, Node-Positive Early Breast Cancer at High Risk of Recurrence',
+ eligibility: `Inclusion Criteria:
+
+1. Female, postmenopausal at the time of randomization. Postmenopausal status defined as: (a) prior bilateral oophorectomy, (b) age ≥60 years, OR (c) age <60 with amenorrhea ≥12 months in the absence of chemotherapy, tamoxifen, or ovarian suppression AND FSH and estradiol in the postmenopausal range.
+2. Age 18 to 75 years inclusive at the time of consent.
+3. ECOG performance status of 0, 1, or 2.
+4. Histologically confirmed invasive breast carcinoma. Multicentric or multifocal disease is allowed if all foci meet eligibility.
+5. Hormone receptor-positive disease, defined as ≥1% of tumor cells staining positive for estrogen receptor and/or progesterone receptor by IHC, per ASCO/CAP guidelines.
+6. HER2-negative disease, defined as IHC 0, 1+, or 2+ with negative reflex ISH testing per ASCO/CAP guidelines.
+7. Stage II or III disease with high-risk pathologic features, defined as ≥1 of the following:
+ - ≥4 positive axillary lymph nodes, OR
+ - 1-3 positive axillary lymph nodes AND tumor size ≥5 cm, OR
+ - 1-3 positive axillary lymph nodes AND histologic grade 3, OR
+ - 1-3 positive axillary lymph nodes AND Ki-67 ≥20%
+8. Definitive surgical treatment of primary tumor with negative margins (lumpectomy with whole-breast irradiation OR mastectomy with or without post-mastectomy radiation per institutional standard).
+9. Completion of any neoadjuvant or adjuvant chemotherapy at least 21 days but no more than 16 months prior to randomization.
+10. Initiation of adjuvant endocrine therapy (aromatase inhibitor, with or without LHRH agonist) is permitted, but participants must not have received endocrine therapy for more than 12 weeks prior to randomization.
+11. Adequate organ function within 14 days of randomization:
+ - ANC ≥1.5 × 10^9/L
+ - Platelets ≥100 × 10^9/L
+ - Hemoglobin ≥10.0 g/dL
+ - Total bilirubin ≤1.5 × ULN
+ - AST/ALT ≤2.5 × ULN
+ - Creatinine clearance ≥50 mL/min
+12. Negative serum or urine pregnancy test for participants of childbearing potential.
+
+Exclusion Criteria:
+
+1. Stage IV (metastatic) breast cancer or evidence of distant metastases on staging imaging.
+2. Inflammatory breast cancer.
+3. Bilateral invasive breast cancer.
+4. Prior treatment with any CDK4/6 inhibitor in any setting.
+5. Prior anti-cancer therapy other than chemotherapy and locoregional therapy for the current breast cancer diagnosis.
+6. History of another malignancy within 5 years prior to randomization, except adequately treated non-melanoma skin cancer, in situ cervical cancer, or contralateral DCIS.
+7. Active or chronic hepatitis B or C infection, or known HIV infection.
+8. Significant uncontrolled cardiovascular disease: NYHA Class III/IV heart failure, myocardial infarction within 6 months, ventricular arrhythmia requiring treatment.
+9. History of interstitial lung disease or pneumonitis requiring corticosteroids.
+10. Major surgery (other than breast cancer surgery) within 28 days of randomization.
+11. Receiving strong CYP3A inhibitors or inducers within 14 days that cannot be discontinued.
+12. Inability to swallow oral medications or significant malabsorption.
+13. Pregnant or breastfeeding (premenopausal participants only — see inclusion criterion 1).`,
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT-LONG-03',
+ title: 'Phase III Study of Pembrolizumab Plus Chemotherapy versus Chemotherapy Alone for First-Line Treatment of Metastatic Squamous Non-Small Cell Lung Cancer',
+ outOfScope: true,
+ eligibility: `Inclusion Criteria:
+
+1. Histologically or cytologically confirmed Stage IV squamous non-small cell lung cancer (NSCLC) per AJCC 8th edition.
+2. Male or female ≥18 years of age.
+3. No prior systemic therapy for metastatic NSCLC. Prior adjuvant or neoadjuvant chemotherapy is allowed if completed ≥6 months prior to enrollment.
+4. Measurable disease per RECIST v1.1.
+5. Provision of a tumor tissue sample (archival or fresh biopsy) adequate for PD-L1 IHC testing using the 22C3 pharmDx assay.
+6. ECOG performance status 0 or 1.
+7. Life expectancy ≥3 months.
+8. Adequate organ function within 10 days of randomization:
+ - ANC ≥1.5 × 10^9/L without G-CSF support
+ - Platelets ≥100 × 10^9/L without transfusion
+ - Hemoglobin ≥9.0 g/dL
+ - Total bilirubin ≤1.5 × ULN
+ - AST/ALT ≤2.5 × ULN (≤5 × ULN if liver involvement)
+ - Creatinine clearance ≥45 mL/min
+ - INR/aPTT ≤1.5 × ULN
+9. Female participants of childbearing potential and male participants with partners of childbearing potential must agree to use effective contraception throughout treatment and for 120 days after last dose.
+
+Exclusion Criteria:
+
+1. Histology of mixed small cell and non-small cell lung cancer, or predominantly non-squamous histology.
+2. Known sensitizing EGFR mutation, ALK rearrangement, ROS1 rearrangement, BRAF V600E mutation, or other actionable alteration for which an approved targeted therapy is the standard of care.
+3. Prior treatment with any PD-1, PD-L1, PD-L2, or CTLA-4 inhibitor.
+4. Active autoimmune disease requiring systemic immunosuppression within 2 years. Replacement therapy (e.g., thyroxine, insulin, physiologic corticosteroids) is permitted.
+5. History of pneumonitis requiring corticosteroids, or active pneumonitis.
+6. Active CNS metastases or carcinomatous meningitis. Participants with previously treated, asymptomatic CNS metastases stable for ≥4 weeks may be eligible.
+7. Active infection requiring systemic therapy.
+8. Known active HIV, HBV, or HCV infection.
+9. Live vaccine within 30 days of first dose.
+10. History of solid organ or allogeneic stem cell transplant.
+11. Pregnant or breastfeeding women.
+12. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer or in situ disease.`,
+ expected: 'UNLIKELY',
+ },
+ {
+ nctId: 'NCT-LONG-04',
+ title: 'Multicenter Randomized Trial of Empagliflozin in Patients with Heart Failure with Preserved Ejection Fraction and Type 2 Diabetes',
+ outOfScope: true,
+ eligibility: `Inclusion Criteria:
+
+1. Adults aged 40 to 85 years at consent.
+2. Documented diagnosis of heart failure with preserved ejection fraction (HFpEF):
+ - Left ventricular ejection fraction (LVEF) ≥50% on echocardiogram within the past 12 months
+ - NYHA functional class II, III, or IV
+ - Elevated NT-proBNP ≥300 pg/mL (or ≥600 pg/mL if atrial fibrillation present)
+ - Structural heart disease on echocardiography (LV hypertrophy or left atrial enlargement) OR documented prior HF hospitalization
+3. Documented Type 2 diabetes mellitus (T2DM) per ADA criteria, with HbA1c 6.5% to 10.0% at screening.
+4. Stable background heart failure therapy for ≥4 weeks (diuretic if indicated; ACEi/ARB/ARNI per guideline; beta-blocker per guideline).
+5. eGFR ≥25 mL/min/1.73m^2 by CKD-EPI equation.
+6. Body mass index 20 to 45 kg/m^2.
+7. Able and willing to provide written informed consent and adhere to study procedures.
+
+Exclusion Criteria:
+
+1. Type 1 diabetes mellitus.
+2. History of diabetic ketoacidosis within 12 months.
+3. LVEF <50% on most recent echocardiogram.
+4. Acute decompensated heart failure requiring IV diuretics within 4 weeks of screening.
+5. Acute coronary syndrome, stroke, or transient ischemic attack within 90 days.
+6. Planned cardiac surgery, percutaneous coronary intervention, or device implantation within 90 days.
+7. Symptomatic hypotension or systolic blood pressure <100 mmHg at screening.
+8. Significant valvular heart disease (severe aortic stenosis, severe mitral regurgitation requiring surgery).
+9. Hypertrophic cardiomyopathy, infiltrative cardiomyopathy, or constrictive pericarditis.
+10. eGFR <25 mL/min/1.73m^2 or end-stage renal disease requiring dialysis.
+11. Known active malignancy requiring treatment within the past 12 months. Participants with a history of cancer who are disease-free for >12 months are eligible.
+12. Severe hepatic impairment (Child-Pugh C).
+13. Pregnancy or breastfeeding.
+14. Known hypersensitivity to SGLT2 inhibitors.
+15. Participation in another interventional clinical trial within 30 days.
+16. Life expectancy <12 months due to non-cardiovascular cause.`,
+ expected: 'UNLIKELY',
+ },
+]
+
+// Patient description presets for multilingual + edge-case validation. Same
+// 58yo woman with breast cancer in Boston, expressed in different languages
+// and registers (formal, terse, etc.) so we can stress-test the model's
+// understanding without changing the underlying clinical signal.
+export const USER_PRESETS = [
+ { id: 'en', label: 'English', text: "I'm 58 years old with breast cancer in Boston" },
+ { id: 'en-2', label: 'English (more detail)', text: "58-year-old woman in Boston, postmenopausal, recently diagnosed with breast cancer, looking for post-chemo treatment options" },
+ { id: 'es', label: 'Spanish (Español)', text: 'Tengo 58 años, vivo en Boston y tengo cáncer de mama' },
+ { id: 'es-2', label: 'Spanish (more detail)', text: 'Soy mujer de 58 años, posmenopáusica, vivo en Boston. Me diagnosticaron cáncer de mama y busco opciones de tratamiento después de quimioterapia.' },
+ { id: 'zh', label: 'Mandarin (中文)', text: '我58岁,住在波士顿,患有乳腺癌' },
+ { id: 'ar', label: 'Arabic (العربية)', text: 'أنا امرأة عمري 58 عامًا أعيش في بوسطن ومصابة بسرطان الثدي' },
+ { id: 'pt', label: 'Portuguese (Português)', text: 'Tenho 58 anos, moro em Boston e tenho câncer de mama' },
+ { id: 'fr', label: 'French (Français)', text: "J'ai 58 ans, je vis à Boston et j'ai un cancer du sein" },
+ { id: 'terse', label: 'Terse / fragments', text: '58F, BC, Boston' },
+]
+
diff --git a/src/components/ClassificationHarness.jsx b/src/components/ClassificationHarness.jsx
index 6b8cc36..0a5bdd5 100644
--- a/src/components/ClassificationHarness.jsx
+++ b/src/components/ClassificationHarness.jsx
@@ -2,384 +2,8 @@ import { useState, useEffect } from 'react'
import { useNLP } from '../hooks/useNLP'
import { useClassifier } from '../hooks/useClassifier'
import { NLP_MODELS, resolveModelKey } from '../utils/nlpModels'
-
-const SAMPLE_TRIALS = [
- {
- nctId: 'NCT05952557',
- title: 'Phase IIIb Study of Ribociclib + Endocrine Therapy in Early Breast Cancer',
- eligibility: 'Inclusion: Adult female, ≥18 years. HR-positive, HER2-negative early breast cancer. Completed definitive surgery. Postmenopausal status confirmed. ECOG 0-1. Adequate organ function. Exclusion: Prior CDK4/6 inhibitor. Pregnancy or breastfeeding. Active second malignancy.',
- expected: 'LIKELY',
- },
- {
- nctId: 'NCT06104020',
- title: 'Sacituzumab Govitecan in Metastatic Triple-Negative Breast Cancer',
- eligibility: 'Inclusion: Adult, any sex. Histologically confirmed metastatic triple-negative breast cancer (ER<1%, PR<1%, HER2-negative). At least one prior line of systemic therapy in metastatic setting. ECOG 0-2. Measurable disease per RECIST 1.1. Exclusion: Active CNS metastases. Prior topoisomerase I inhibitor.',
- expected: 'POSSIBLE',
- },
- {
- nctId: 'NCT05887492',
- title: 'Adaptive Radiation Boost in Locally Advanced HER2+ Breast Cancer',
- eligibility: 'Inclusion: Adult female. HER2-positive breast cancer confirmed by IHC 3+ or FISH-positive. Stage II-III disease. Completed neoadjuvant chemotherapy. ECOG 0-1. Exclusion: Prior radiation to chest. Pregnancy.',
- expected: 'POSSIBLE',
- },
- {
- nctId: 'NCT06221340',
- title: 'Aerobic Exercise During Adjuvant Chemo for Breast Cancer Survivors',
- eligibility: 'Inclusion: Adult, any sex. Breast cancer, any stage. Currently receiving or scheduled for adjuvant chemotherapy. Cleared by oncologist for moderate exercise. Exclusion: Cardiac contraindications.',
- expected: 'LIKELY',
- },
- {
- nctId: 'NCT04123456',
- title: 'Pembrolizumab in Advanced Non-Small Cell Lung Cancer',
- eligibility: 'Inclusion: Adult. Histologically confirmed advanced NSCLC. PD-L1 expression ≥50%. ECOG 0-1. Exclusion: Active autoimmune disease. Prior immunotherapy.',
- expected: 'UNLIKELY',
- outOfScope: true, // NSCLC — wouldn't appear in a breast-cancer API search
- },
- {
- nctId: 'NCT05123987',
- title: 'Targeted Therapy in Pediatric Acute Lymphoblastic Leukemia',
- eligibility: 'Inclusion: Pediatric patients aged 2-17 years. Newly diagnosed ALL. Exclusion: Adults. Prior chemotherapy.',
- expected: 'UNLIKELY',
- outOfScope: true, // Pediatric ALL — wouldn't appear in a breast-cancer API search
- },
-
- // ─── Subtype-gated breast cancer trials — POSSIBLE without confirmed subtype ───
- {
- nctId: 'NCT05300100',
- title: 'Tucatinib + Trastuzumab in HER2-Positive Metastatic Breast Cancer',
- eligibility: 'Inclusion: Adult, any sex, ≥18 years. Histologically confirmed HER2-positive metastatic breast cancer (IHC 3+ or FISH-amplified). At least 2 prior HER2-directed therapies. ECOG 0-1. Exclusion: Untreated brain metastases. Prior tucatinib.',
- expected: 'POSSIBLE',
- },
- {
- nctId: 'NCT05400201',
- title: 'Olaparib Maintenance in BRCA-Mutated HER2-Negative Breast Cancer',
- eligibility: 'Inclusion: Adult female. HER2-negative breast cancer with germline BRCA1 or BRCA2 mutation (confirmed by central testing). High-risk early disease following adjuvant chemotherapy. Postmenopausal or premenopausal with ovarian suppression. Exclusion: Prior PARP inhibitor.',
- expected: 'POSSIBLE',
- },
- {
- nctId: 'NCT05511223',
- title: 'CDK4/6 Inhibitor Switch in Hormone-Receptor-Positive Advanced Breast Cancer',
- eligibility: 'Inclusion: Adult women, postmenopausal. HR-positive, HER2-negative advanced or metastatic breast cancer. Disease progression on a prior CDK4/6 inhibitor. ECOG 0-2.',
- expected: 'POSSIBLE',
- },
-
- // ─── Strong matches for a 58yo with breast cancer ───
- {
- nctId: 'NCT05633445',
- title: 'Cognitive Behavioral Therapy for Cancer-Related Fatigue',
- eligibility: 'Inclusion: Adults ≥18 years with any solid tumor diagnosis (breast, colon, lung, prostate, etc.). Currently in active treatment or within 5 years of treatment completion. Self-reported fatigue ≥4 on a 0-10 scale. Exclusion: Severe untreated depression. Inability to attend weekly sessions.',
- expected: 'LIKELY',
- },
- {
- nctId: 'NCT05755677',
- title: 'Lymphedema Surveillance Program After Breast Cancer Surgery',
- eligibility: 'Inclusion: Adult female ≥18 years. History of breast cancer treated with axillary surgery (sentinel lymph node biopsy or axillary dissection). Within 3 years of surgery. Exclusion: Pre-existing lymphedema. Current breast cancer recurrence.',
- expected: 'LIKELY',
- },
- {
- nctId: 'NCT05822334',
- title: 'Mindfulness-Based Stress Reduction for Breast Cancer Survivors',
- eligibility: 'Inclusion: Adult women ≥21 years. Diagnosed with breast cancer (any stage). Completed primary treatment within the past 5 years OR currently on adjuvant endocrine therapy. Exclusion: Active psychosis. Prior MBSR participation.',
- expected: 'LIKELY',
- },
- {
- nctId: 'NCT05901128',
- title: 'Vaginal Estrogen Safety Study in Postmenopausal Breast Cancer Survivors',
- eligibility: 'Inclusion: Postmenopausal women ages 45-75 with a history of HR-positive or HR-negative breast cancer. Disease-free for ≥1 year. Genitourinary symptoms of menopause. Stable on aromatase inhibitor or tamoxifen, or treatment-free. Exclusion: Current metastatic disease.',
- expected: 'LIKELY',
- },
-
- // ─── Wrong condition / wrong demographic — clear UNLIKELY ───
- {
- nctId: 'NCT04567890',
- title: 'Pembrolizumab in Advanced Melanoma',
- eligibility: 'Inclusion: Adults with histologically confirmed unresectable Stage III or Stage IV melanoma. ECOG 0-1. No prior systemic therapy for advanced disease. Exclusion: Active autoimmune disease.',
- expected: 'UNLIKELY',
- outOfScope: true,
- },
- {
- nctId: 'NCT04678901',
- title: 'Apixaban vs. Warfarin in Atrial Fibrillation',
- eligibility: 'Inclusion: Adults ≥18 years with non-valvular atrial fibrillation. CHA2DS2-VASc score ≥2. Exclusion: Mechanical heart valve. Active bleeding.',
- expected: 'UNLIKELY',
- outOfScope: true,
- },
- {
- nctId: 'NCT04789012',
- title: 'GLP-1 Agonist for Weight Management in Type 2 Diabetes',
- eligibility: 'Inclusion: Adults 18-75 with Type 2 diabetes mellitus. BMI ≥30. HbA1c 7.0-10.0%. Exclusion: Type 1 diabetes. Active malignancy within 5 years. History of pancreatitis.',
- expected: 'UNLIKELY',
- outOfScope: true,
- },
- {
- nctId: 'NCT04890123',
- title: 'Robotic Prostatectomy Outcomes in Localized Prostate Cancer',
- eligibility: 'Inclusion: Men ≥40 years with biopsy-confirmed clinically localized prostate cancer (T1-T2). Candidate for radical prostatectomy. Exclusion: Prior pelvic surgery or radiation.',
- expected: 'UNLIKELY',
- outOfScope: true,
- },
- {
- nctId: 'NCT04901234',
- title: 'Pediatric Vaccine Immunogenicity Study',
- eligibility: 'Inclusion: Healthy children aged 6 months to 5 years. Up to date on routine immunizations. Exclusion: Immunocompromised. Recent illness within 14 days.',
- expected: 'UNLIKELY',
- outOfScope: true,
- },
-
- // ─── Edge cases — should challenge the model ───
- {
- nctId: 'NCT05012345',
- title: 'Palliative Care Integration in Patients with Advanced Solid Tumors',
- eligibility: 'Inclusion: Adults ≥18 years with advanced (Stage IV) solid tumor of any primary site (breast, lung, GI, GU, GYN). Estimated prognosis 6-24 months. ECOG 0-3. Exclusion: Currently enrolled in hospice.',
- expected: 'POSSIBLE',
- },
- {
- nctId: 'NCT05123450',
- title: 'Premenopausal Breast Cancer: Ovarian Function Suppression Trial',
- eligibility: 'Inclusion: Premenopausal women ages 18-45 with newly diagnosed HR-positive early breast cancer. Confirmed premenopausal by FSH and estradiol levels. Exclusion: Postmenopausal status. Prior ovarian suppression therapy.',
- expected: 'UNLIKELY',
- },
-
- // ─── Realistic-length eligibility (~2-3.5kB each) — stress-tests how the
- // model handles formal CT.gov noise and how truncation affects accuracy.
- // Try these with eligMax = 800 vs 3000 vs 6000 to see the trade-off.
- {
- nctId: 'NCT-LONG-01',
- title: 'Phase II Study of Sacituzumab Govitecan-hziy in Patients with HR-Positive, HER2-Negative Metastatic Breast Cancer After Endocrine Therapy and CDK4/6 Inhibitor',
- eligibility: `Inclusion Criteria:
-
-1. Female participants ≥18 years of age at the time of signing informed consent.
-2. Histologically or cytologically confirmed adenocarcinoma of the breast that is metastatic or locally advanced and not amenable to curative resection or radiotherapy.
-3. Documentation of estrogen receptor (ER)-positive (≥1% staining by IHC) and/or progesterone receptor (PR)-positive (≥1% staining by IHC) tumor status, in accordance with ASCO/CAP guidelines.
-4. Documentation of HER2-negative status defined as IHC 0, IHC 1+, or IHC 2+ with negative in situ hybridization (ISH), per ASCO/CAP guidelines.
-5. Disease progression on or after at least one prior CDK4/6 inhibitor (palbociclib, ribociclib, or abemaciclib) administered for advanced or metastatic disease, in combination with an aromatase inhibitor or fulvestrant.
-6. Disease progression on or after at least one and no more than two prior endocrine therapies (e.g., aromatase inhibitor, fulvestrant, tamoxifen) for advanced or metastatic disease.
-7. No more than one prior chemotherapy regimen for metastatic disease.
-8. Postmenopausal status, OR premenopausal/perimenopausal women who agree to receive concurrent ovarian function suppression with a luteinizing hormone-releasing hormone (LHRH) agonist throughout study treatment.
-9. Measurable disease per RECIST v1.1, or non-measurable bone-only disease assessable per protocol-specified criteria.
-10. ECOG performance status 0 or 1.
-11. Adequate organ function:
- - Absolute neutrophil count (ANC) ≥1.5 × 10^9/L
- - Platelets ≥100 × 10^9/L
- - Hemoglobin ≥9.0 g/dL (transfusion permitted)
- - Total bilirubin ≤1.5 × ULN (≤3 × ULN for participants with documented Gilbert syndrome)
- - AST and ALT ≤2.5 × ULN (≤5 × ULN if liver metastases present)
- - Creatinine clearance ≥50 mL/min by Cockcroft-Gault equation
- - INR and aPTT ≤1.5 × ULN unless on anticoagulants
-12. Resolution of all acute toxic effects of prior anti-cancer therapy or surgical procedures to NCI CTCAE v5.0 Grade ≤1 (except alopecia and Grade 2 neuropathy).
-13. Willingness to provide tumor tissue (archival or fresh biopsy) for biomarker analyses.
-
-Exclusion Criteria:
-
-1. Prior treatment with sacituzumab govitecan or any other Trop-2-directed therapy.
-2. Prior treatment with an antibody-drug conjugate containing a topoisomerase I inhibitor payload (e.g., trastuzumab deruxtecan).
-3. Active CNS metastases. Participants with previously treated, asymptomatic CNS metastases are eligible if clinically stable for ≥4 weeks off corticosteroids and anticonvulsants.
-4. Leptomeningeal disease.
-5. Known active infection requiring systemic therapy, including untreated HIV, active HBV (HBsAg positive or HBV DNA detectable), or active HCV (HCV RNA detectable).
-6. Significant cardiovascular disease, including: NYHA Class III or IV congestive heart failure, myocardial infarction or unstable angina within 6 months, uncontrolled arrhythmia, baseline QTcF >470 ms.
-7. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer, in situ cervical or breast cancer, or low-risk localized prostate cancer on active surveillance.
-8. Known hypersensitivity to irinotecan or any component of the study drug formulation.
-9. Pregnant or breastfeeding women. Women of childbearing potential must agree to use highly effective contraception during the study and for 6 months after the last dose.
-10. Concurrent participation in another therapeutic clinical trial.
-11. Major surgery within 4 weeks prior to first dose.
-12. Live vaccines within 30 days prior to first dose.`,
- expected: 'POSSIBLE',
- },
- {
- nctId: 'NCT-LONG-02',
- title: 'Randomized Phase III Trial of Adjuvant Endocrine Therapy ± Abemaciclib in Postmenopausal Women with HR-Positive, HER2-Negative, Node-Positive Early Breast Cancer at High Risk of Recurrence',
- eligibility: `Inclusion Criteria:
-
-1. Female, postmenopausal at the time of randomization. Postmenopausal status defined as: (a) prior bilateral oophorectomy, (b) age ≥60 years, OR (c) age <60 with amenorrhea ≥12 months in the absence of chemotherapy, tamoxifen, or ovarian suppression AND FSH and estradiol in the postmenopausal range.
-2. Age 18 to 75 years inclusive at the time of consent.
-3. ECOG performance status of 0, 1, or 2.
-4. Histologically confirmed invasive breast carcinoma. Multicentric or multifocal disease is allowed if all foci meet eligibility.
-5. Hormone receptor-positive disease, defined as ≥1% of tumor cells staining positive for estrogen receptor and/or progesterone receptor by IHC, per ASCO/CAP guidelines.
-6. HER2-negative disease, defined as IHC 0, 1+, or 2+ with negative reflex ISH testing per ASCO/CAP guidelines.
-7. Stage II or III disease with high-risk pathologic features, defined as ≥1 of the following:
- - ≥4 positive axillary lymph nodes, OR
- - 1-3 positive axillary lymph nodes AND tumor size ≥5 cm, OR
- - 1-3 positive axillary lymph nodes AND histologic grade 3, OR
- - 1-3 positive axillary lymph nodes AND Ki-67 ≥20%
-8. Definitive surgical treatment of primary tumor with negative margins (lumpectomy with whole-breast irradiation OR mastectomy with or without post-mastectomy radiation per institutional standard).
-9. Completion of any neoadjuvant or adjuvant chemotherapy at least 21 days but no more than 16 months prior to randomization.
-10. Initiation of adjuvant endocrine therapy (aromatase inhibitor, with or without LHRH agonist) is permitted, but participants must not have received endocrine therapy for more than 12 weeks prior to randomization.
-11. Adequate organ function within 14 days of randomization:
- - ANC ≥1.5 × 10^9/L
- - Platelets ≥100 × 10^9/L
- - Hemoglobin ≥10.0 g/dL
- - Total bilirubin ≤1.5 × ULN
- - AST/ALT ≤2.5 × ULN
- - Creatinine clearance ≥50 mL/min
-12. Negative serum or urine pregnancy test for participants of childbearing potential.
-
-Exclusion Criteria:
-
-1. Stage IV (metastatic) breast cancer or evidence of distant metastases on staging imaging.
-2. Inflammatory breast cancer.
-3. Bilateral invasive breast cancer.
-4. Prior treatment with any CDK4/6 inhibitor in any setting.
-5. Prior anti-cancer therapy other than chemotherapy and locoregional therapy for the current breast cancer diagnosis.
-6. History of another malignancy within 5 years prior to randomization, except adequately treated non-melanoma skin cancer, in situ cervical cancer, or contralateral DCIS.
-7. Active or chronic hepatitis B or C infection, or known HIV infection.
-8. Significant uncontrolled cardiovascular disease: NYHA Class III/IV heart failure, myocardial infarction within 6 months, ventricular arrhythmia requiring treatment.
-9. History of interstitial lung disease or pneumonitis requiring corticosteroids.
-10. Major surgery (other than breast cancer surgery) within 28 days of randomization.
-11. Receiving strong CYP3A inhibitors or inducers within 14 days that cannot be discontinued.
-12. Inability to swallow oral medications or significant malabsorption.
-13. Pregnant or breastfeeding (premenopausal participants only — see inclusion criterion 1).`,
- expected: 'LIKELY',
- },
- {
- nctId: 'NCT-LONG-03',
- title: 'Phase III Study of Pembrolizumab Plus Chemotherapy versus Chemotherapy Alone for First-Line Treatment of Metastatic Squamous Non-Small Cell Lung Cancer',
- outOfScope: true,
- eligibility: `Inclusion Criteria:
-
-1. Histologically or cytologically confirmed Stage IV squamous non-small cell lung cancer (NSCLC) per AJCC 8th edition.
-2. Male or female ≥18 years of age.
-3. No prior systemic therapy for metastatic NSCLC. Prior adjuvant or neoadjuvant chemotherapy is allowed if completed ≥6 months prior to enrollment.
-4. Measurable disease per RECIST v1.1.
-5. Provision of a tumor tissue sample (archival or fresh biopsy) adequate for PD-L1 IHC testing using the 22C3 pharmDx assay.
-6. ECOG performance status 0 or 1.
-7. Life expectancy ≥3 months.
-8. Adequate organ function within 10 days of randomization:
- - ANC ≥1.5 × 10^9/L without G-CSF support
- - Platelets ≥100 × 10^9/L without transfusion
- - Hemoglobin ≥9.0 g/dL
- - Total bilirubin ≤1.5 × ULN
- - AST/ALT ≤2.5 × ULN (≤5 × ULN if liver involvement)
- - Creatinine clearance ≥45 mL/min
- - INR/aPTT ≤1.5 × ULN
-9. Female participants of childbearing potential and male participants with partners of childbearing potential must agree to use effective contraception throughout treatment and for 120 days after last dose.
-
-Exclusion Criteria:
-
-1. Histology of mixed small cell and non-small cell lung cancer, or predominantly non-squamous histology.
-2. Known sensitizing EGFR mutation, ALK rearrangement, ROS1 rearrangement, BRAF V600E mutation, or other actionable alteration for which an approved targeted therapy is the standard of care.
-3. Prior treatment with any PD-1, PD-L1, PD-L2, or CTLA-4 inhibitor.
-4. Active autoimmune disease requiring systemic immunosuppression within 2 years. Replacement therapy (e.g., thyroxine, insulin, physiologic corticosteroids) is permitted.
-5. History of pneumonitis requiring corticosteroids, or active pneumonitis.
-6. Active CNS metastases or carcinomatous meningitis. Participants with previously treated, asymptomatic CNS metastases stable for ≥4 weeks may be eligible.
-7. Active infection requiring systemic therapy.
-8. Known active HIV, HBV, or HCV infection.
-9. Live vaccine within 30 days of first dose.
-10. History of solid organ or allogeneic stem cell transplant.
-11. Pregnant or breastfeeding women.
-12. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer or in situ disease.`,
- expected: 'UNLIKELY',
- },
- {
- nctId: 'NCT-LONG-04',
- title: 'Multicenter Randomized Trial of Empagliflozin in Patients with Heart Failure with Preserved Ejection Fraction and Type 2 Diabetes',
- outOfScope: true,
- eligibility: `Inclusion Criteria:
-
-1. Adults aged 40 to 85 years at consent.
-2. Documented diagnosis of heart failure with preserved ejection fraction (HFpEF):
- - Left ventricular ejection fraction (LVEF) ≥50% on echocardiogram within the past 12 months
- - NYHA functional class II, III, or IV
- - Elevated NT-proBNP ≥300 pg/mL (or ≥600 pg/mL if atrial fibrillation present)
- - Structural heart disease on echocardiography (LV hypertrophy or left atrial enlargement) OR documented prior HF hospitalization
-3. Documented Type 2 diabetes mellitus (T2DM) per ADA criteria, with HbA1c 6.5% to 10.0% at screening.
-4. Stable background heart failure therapy for ≥4 weeks (diuretic if indicated; ACEi/ARB/ARNI per guideline; beta-blocker per guideline).
-5. eGFR ≥25 mL/min/1.73m^2 by CKD-EPI equation.
-6. Body mass index 20 to 45 kg/m^2.
-7. Able and willing to provide written informed consent and adhere to study procedures.
-
-Exclusion Criteria:
-
-1. Type 1 diabetes mellitus.
-2. History of diabetic ketoacidosis within 12 months.
-3. LVEF <50% on most recent echocardiogram.
-4. Acute decompensated heart failure requiring IV diuretics within 4 weeks of screening.
-5. Acute coronary syndrome, stroke, or transient ischemic attack within 90 days.
-6. Planned cardiac surgery, percutaneous coronary intervention, or device implantation within 90 days.
-7. Symptomatic hypotension or systolic blood pressure <100 mmHg at screening.
-8. Significant valvular heart disease (severe aortic stenosis, severe mitral regurgitation requiring surgery).
-9. Hypertrophic cardiomyopathy, infiltrative cardiomyopathy, or constrictive pericarditis.
-10. eGFR <25 mL/min/1.73m^2 or end-stage renal disease requiring dialysis.
-11. Known active malignancy requiring treatment within the past 12 months. Participants with a history of cancer who are disease-free for >12 months are eligible.
-12. Severe hepatic impairment (Child-Pugh C).
-13. Pregnancy or breastfeeding.
-14. Known hypersensitivity to SGLT2 inhibitors.
-15. Participation in another interventional clinical trial within 30 days.
-16. Life expectancy <12 months due to non-cardiovascular cause.`,
- expected: 'UNLIKELY',
- },
-]
-
-const DEFAULT_PROMPT = `You decide whether a clinical trial is worth showing to a patient. Output one of two labels:
-
-- LIKELY: the trial studies the patient's condition AND nothing in the eligibility clearly excludes the patient based on what they stated. Worth showing.
-- UNLIKELY: the trial studies a different disease, OR the patient is clearly the wrong sex / age / population. Not worth showing.
-
-Be inclusive on LIKELY: if the trial requires a subtype, biomarker, stage, or prior treatment the patient did NOT mention, still call it LIKELY — the patient or their doctor can verify. Only use UNLIKELY when the patient is clearly disqualified by something they DID state.
-
-Examples (note: each example uses a DIFFERENT patient — focus on the reasoning, not the patient details):
-
-Patient: "45-year-old woman with ovarian cancer"
-Trial: PARP Inhibitor in BRCA-Mutated Ovarian Cancer (Eligibility: women with ovarian cancer and BRCA mutation)
-Answer: LIKELY | matches ovarian cancer in a woman; BRCA status can be verified
-
-Patient: "70-year-old man with type 2 diabetes"
-Trial: Tamoxifen in Premenopausal Breast Cancer (Eligibility: premenopausal women with breast cancer)
-Answer: UNLIKELY | trial is for breast cancer in women; patient has diabetes
-
-Patient: "8-year-old child with asthma"
-Trial: Adult Anti-Inflammatory for Asthma (Eligibility: adults 18+ with persistent asthma)
-Answer: UNLIKELY | trial is for adults; patient is a child
-
-Patient: "55-year-old man with hypertension"
-Trial: Yoga Intervention for Adults with Chronic Conditions (Eligibility: adults 40-75 with any chronic condition)
-Answer: LIKELY | adult with chronic condition matches the broad inclusion
-
-Now classify:
-
-Patient: {{user}}
-Trial: {{title}}
-Eligibility: {{eligibility}}
-
-Answer (one line, format exactly "
| "):`
-
-const DEFAULT_USER_DESC = "I'm 58 years old with breast cancer in Boston"
-
-// Patient description presets for multilingual + edge-case validation. Same
-// 58yo woman with breast cancer in Boston, expressed in different languages
-// and registers (formal, terse, etc.) so we can stress-test the model's
-// understanding without changing the underlying clinical signal.
-const USER_PRESETS = [
- { id: 'en', label: 'English', text: "I'm 58 years old with breast cancer in Boston" },
- { id: 'en-2', label: 'English (more detail)', text: "58-year-old woman in Boston, postmenopausal, recently diagnosed with breast cancer, looking for post-chemo treatment options" },
- { id: 'es', label: 'Spanish (Español)', text: 'Tengo 58 años, vivo en Boston y tengo cáncer de mama' },
- { id: 'es-2', label: 'Spanish (more detail)', text: 'Soy mujer de 58 años, posmenopáusica, vivo en Boston. Me diagnosticaron cáncer de mama y busco opciones de tratamiento después de quimioterapia.' },
- { id: 'zh', label: 'Mandarin (中文)', text: '我58岁,住在波士顿,患有乳腺癌' },
- { id: 'ar', label: 'Arabic (العربية)', text: 'أنا امرأة عمري 58 عامًا أعيش في بوسطن ومصابة بسرطان الثدي' },
- { id: 'pt', label: 'Portuguese (Português)', text: 'Tenho 58 anos, moro em Boston e tenho câncer de mama' },
- { id: 'fr', label: 'French (Français)', text: "J'ai 58 ans, je vis à Boston et j'ai un cancer du sein" },
- { id: 'terse', label: 'Terse / fragments', text: '58F, BC, Boston' },
-]
-
-// Parser still accepts POSSIBLE in case the model emits it (older prompts,
-// instruction drift) — POSSIBLE is normalized to LIKELY since the binary
-// product question is "show or hide".
-function parseVerdict(raw) {
- if (!raw || typeof raw !== 'string') return { verdict: 'PARSE_FAIL', reason: '(empty output)' }
- const m = raw.match(/^\s*(LIKELY|POSSIBLE|UNLIKELY)\s*[|:\-—]\s*(.+?)\s*$/im)
- if (m) {
- const v = m[1].toUpperCase()
- return { verdict: v === 'POSSIBLE' ? 'LIKELY' : v, reason: m[2].trim() }
- }
- const w = raw.match(/\b(LIKELY|POSSIBLE|UNLIKELY)\b/i)
- if (w) {
- const v = w[1].toUpperCase()
- return {
- verdict: v === 'POSSIBLE' ? 'LIKELY' : v,
- reason: raw.replace(w[0], '').replace(/^[\s|:\-—]+/, '').trim() || '(no reason)',
- }
- }
- return { verdict: 'PARSE_FAIL', reason: raw.slice(0, 120) }
-}
+import { DEFAULT_CLASSIFY_PROMPT, parseVerdict } from '../utils/classifyTrial'
+import { SAMPLE_TRIALS, USER_PRESETS } from './ClassificationHarness.fixtures'
// Normalize fixture-side expected values for binary agreement: POSSIBLE
// counts as LIKELY (both = "show this trial"). Keeps the fixture data
@@ -404,10 +28,10 @@ export default function ClassificationHarness() {
)
const model = NLP_MODELS[modelKey]
const { status, progress, error, load, webGPUSupported } = useNLP()
- const { classifyOne } = useClassifier()
+ const { classifyOne, translateOne } = useClassifier()
- const [userDesc, setUserDesc] = useState(DEFAULT_USER_DESC)
- const [promptTemplate, setPromptTemplate] = useState(DEFAULT_PROMPT)
+ const [userDesc, setUserDesc] = useState(USER_PRESETS[0].text)
+ const [promptTemplate, setPromptTemplate] = useState(DEFAULT_CLASSIFY_PROMPT)
const [trialsJson, setTrialsJson] = useState(JSON.stringify(SAMPLE_TRIALS, null, 2))
const [concurrency, setConcurrency] = useState(3)
const [eligMax, setEligMax] = useState(1500)
@@ -458,7 +82,7 @@ Patient description: ${userDesc}
English translation:`
try {
- const { raw } = await classifyOne(translatePrompt)
+ const { raw } = await translateOne(translatePrompt)
effectiveUserDesc = (raw || '').trim().replace(/^["']|["']$/g, '')
setTranslatedDesc(effectiveUserDesc)
} catch (e) {
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index ad99d6c..c12a4d9 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -4,6 +4,7 @@ import { useClinicalTrials } from '../hooks/useClinicalTrials'
import { useSimplifier } from '../hooks/useSimplifier'
import { useNLP } from '../hooks/useNLP'
import { useClassifier } from '../hooks/useClassifier'
+import { useIsMobile } from '../hooks/useIsMobile'
import { NLP_MODELS } from '../utils/nlpModels'
import { buildClassifyPrompt, parseVerdict } from '../utils/classifyTrial'
import ResultCard from './ResultCard'
@@ -17,6 +18,14 @@ import {
const NLP_CONSENT_KEY = 'iris_nlp_enabled'
+// Stage-1 classification is wired end-to-end (worker, hook, harness) but
+// not yet surfaced in the in-app results UI. Reason: without sort wiring
+// the fit dots don't drive any user-visible behavior — they're just
+// decoration. The harness at ?test=classify still uses the full pipeline
+// for prompt iteration and validation. Flip this to true once "Best fit"
+// sort is wired so the dots become actionable.
+const ENABLE_CLASSIFY_IN_RESULTS = false
+
// Build a synthetic patient description from extracted fields when the user
// came in via structured form but had previously used NL (so consent exists).
function patientDescFromFields(fields) {
@@ -30,27 +39,8 @@ function patientDescFromFields(fields) {
}
const EAGER_BATCH_SIZE = 5
-const MOBILE_BREAKPOINT_PX = 820
const LIST_WIDTH_PX = 400
-// matchMedia (not 'resize'): iOS Safari fires 'resize' inconsistently on
-// rotation; matchMedia.change is the reliable signal. Also catches iPad
-// split-screen and browser-window mode switches without a manual resize.
-function useIsMobile() {
- const query = `(max-width: ${MOBILE_BREAKPOINT_PX}px)`
- const [isMobile, setIsMobile] = useState(() =>
- typeof window !== 'undefined' && window.matchMedia(query).matches
- )
- useEffect(() => {
- const mq = window.matchMedia(query)
- const onChange = (e) => setIsMobile(e.matches)
- mq.addEventListener('change', onChange)
- return () => mq.removeEventListener('change', onChange)
- // eslint-disable-next-line react-hooks/exhaustive-deps
- }, [])
- return isMobile
-}
-
export default function ResultsList({ searchParams, modelKey, userDescription, extractedFields }) {
// Phase 3 simplification only ships for English and Spanish — those are
// the languages we've verified the local model produces accurately.
@@ -105,8 +95,11 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
const consented = useMemo(() => {
try { return localStorage.getItem(NLP_CONSENT_KEY) === 'true' } catch { return false }
}, [])
- const patientDesc = userDescription || patientDescFromFields(extractedFields)
- const canClassify = consented && nlp.webGPUSupported && Boolean(patientDesc)
+ const patientDesc = useMemo(
+ () => userDescription || patientDescFromFields(extractedFields),
+ [userDescription, extractedFields]
+ )
+ const canClassify = ENABLE_CLASSIFY_IN_RESULTS && consented && nlp.webGPUSupported && Boolean(patientDesc)
// Idempotent: worker fast-returns 'ready' if engine already loaded
// (e.g. NL extraction loaded it earlier this session). Destructure
diff --git a/src/components/TriageRow.jsx b/src/components/TriageRow.jsx
index 2246c11..e7d183d 100644
--- a/src/components/TriageRow.jsx
+++ b/src/components/TriageRow.jsx
@@ -15,18 +15,23 @@ function FitDot({ classification, pending }) {
if (!classification) return null
const isLikely = classification.verdict === 'LIKELY'
+ // Fold the model's reason into aria-label so SR/keyboard users get the
+ // same context as a sighted hover. title alone wasn't reaching either
+ // group reliably (title isn't announced by most screen readers, isn't
+ // keyboard-discoverable). Same string in both attrs means verdict +
+ // reason are the unit a user perceives, not just the verdict.
+ const label = isLikely
+ ? `Likely fit — ${classification.reason || 'matches your description'}`
+ : `Less likely fit — ${classification.reason || 'may not match'}`
return (
)
}
diff --git a/src/hooks/useClassifier.js b/src/hooks/useClassifier.js
index 2004aac..e899970 100644
--- a/src/hooks/useClassifier.js
+++ b/src/hooks/useClassifier.js
@@ -1,18 +1,19 @@
import { useRef, useEffect, useCallback } from 'react'
import { getSharedWorker, attachListener } from '../workers/sharedNlpWorker'
-// Stage-1 classifier hook. Posts a 'classify' task to the shared NLP worker
-// and resolves with { raw, latencyMs }. The caller parses the verdict.
+// Two task hooks (classifyOne, translateOne) share a single promise chain
+// because WebLLM's MLCEngine is NOT parallel-safe. Concurrent
+// engine.chat.completions.create() calls clobber state and produce
+// "Message error should not be 0" failures. Callers can fire-and-forget
+// concurrently; each request waits its turn behind the chain.
//
-// IMPORTANT: WebLLM's MLCEngine is NOT parallel-safe. Concurrent
-// engine.chat.completions.create() calls clobber each other's state and
-// produce "Message error should not be 0" failures. We serialize all
-// classify requests through a single promise chain at the hook level —
-// callers can fire-and-forget concurrently, but each request waits its
-// turn. Caller-side concurrency knobs become a no-op for actual
-// parallelism, but still control queue capacity.
+// The two task types are functionally similar (one-shot completion with
+// raw + latencyMs return) but conceptually distinct, so they get distinct
+// worker message types ('classify' vs 'translate') for clarity and so the
+// worker can use different max_tokens budgets.
//
-// The worker must already have the model loaded.
+// The worker must already have the model loaded. classify/translateOne
+// reject with 'Engine not loaded' otherwise.
export function useClassifier() {
const pendingRef = useRef(new Map())
const detachRef = useRef(null)
@@ -26,12 +27,14 @@ export function useClassifier() {
function handleMessage(event) {
const { type, taskId, raw, latencyMs, message } = event.data ?? {}
- if (type !== 'classify_done' && type !== 'classify_error') return
+ const isDone = type === 'classify_done' || type === 'translate_done'
+ const isError = type === 'classify_error' || type === 'translate_error'
+ if (!isDone && !isError) return
const pending = pendingRef.current.get(taskId)
if (!pending) return
pendingRef.current.delete(taskId)
- if (type === 'classify_done') pending.resolve({ raw, latencyMs })
- else pending.reject(new Error(message ?? 'classify failed'))
+ if (isDone) pending.resolve({ raw, latencyMs })
+ else pending.reject(new Error(message ?? 'task failed'))
}
useEffect(() => {
@@ -39,11 +42,9 @@ export function useClassifier() {
return () => {
detachRef.current?.()
detachRef.current = null
- // Reject every in-flight classify so awaiting callers don't hang
+ // Reject every in-flight task so awaiting callers don't hang
// forever when the component unmounts mid-batch (or during a
- // StrictMode dev double-invoke). Without this, the listener
- // detaches but the pendingRef Map still holds resolve/reject
- // handles whose promise will never settle.
+ // StrictMode dev double-invoke).
for (const { reject } of pending.values()) {
reject(new Error('classifier unmounted'))
}
@@ -51,20 +52,26 @@ export function useClassifier() {
}
}, [])
- const classifyOne = useCallback((prompt) => {
+ // Generic task runner — same chain semantics, different worker message
+ // type. taskIdPrefix lets handleMessage route done/error messages back
+ // to the right pending entry; it doesn't have to be unique per type
+ // (the Map is keyed on the full taskId) but it makes worker logs
+ // self-documenting.
+ function runTask(workerType, taskIdPrefix, prompt) {
ensureSubscribed()
- const taskId = `classify-${++taskIdRef.current}`
- // Chain onto the previous request so only one inference runs at a time.
- // .catch in the chain prevents one failure from breaking the whole queue.
+ const taskId = `${taskIdPrefix}-${++taskIdRef.current}`
const next = chainRef.current.catch(() => {}).then(() =>
new Promise((resolve, reject) => {
pendingRef.current.set(taskId, { resolve, reject })
- getSharedWorker().postMessage({ type: 'classify', taskId, prompt })
+ getSharedWorker().postMessage({ type: workerType, taskId, prompt })
})
)
chainRef.current = next
return next
- }, [])
+ }
+
+ const classifyOne = useCallback((prompt) => runTask('classify', 'classify', prompt), [])
+ const translateOne = useCallback((prompt) => runTask('translate', 'translate', prompt), [])
- return { classifyOne }
+ return { classifyOne, translateOne }
}
diff --git a/src/hooks/useClassifier.test.js b/src/hooks/useClassifier.test.js
new file mode 100644
index 0000000..61f0c85
--- /dev/null
+++ b/src/hooks/useClassifier.test.js
@@ -0,0 +1,94 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+import { renderHook, act, waitFor } from '@testing-library/react'
+import { useClassifier } from './useClassifier'
+
+// Mock the shared worker so tests don't touch the real WebLLM worker.
+// We intercept postMessage to capture call order, and we expose a way for
+// the test to invoke the listener with synthetic 'classify_done' messages.
+let capturedListener = null
+let postedMessages = []
+
+vi.mock('../workers/sharedNlpWorker', () => ({
+ getSharedWorker: () => ({
+ postMessage: (msg) => { postedMessages.push(msg) },
+ }),
+ attachListener: (fn) => {
+ capturedListener = fn
+ return () => { capturedListener = null }
+ },
+}))
+
+beforeEach(() => {
+ capturedListener = null
+ postedMessages = []
+})
+
+// Helper: construct a 'classify_done' worker message and pass it to whatever
+// useClassifier registered as its listener.
+function dispatchDone(taskId, raw = 'LIKELY | mock', latencyMs = 100) {
+ capturedListener({ data: { type: 'classify_done', taskId, raw, latencyMs } })
+}
+
+describe('useClassifier — promise chain serialization', () => {
+ it('posts only the first request to the worker until it settles', async () => {
+ const { result } = renderHook(() => useClassifier())
+
+ // Fire 3 concurrent classifyOne calls.
+ let p1, p2, p3
+ p1 = result.current.classifyOne('prompt-1')
+ p2 = result.current.classifyOne('prompt-2')
+ p3 = result.current.classifyOne('prompt-3')
+
+ // Only the first task should be in flight.
+ await waitFor(() => expect(postedMessages.length).toBe(1))
+ expect(postedMessages[0].prompt).toBe('prompt-1')
+
+ // Settle task 1; task 2 should now post.
+ dispatchDone(postedMessages[0].taskId, 'LIKELY | one')
+ await p1
+ await waitFor(() => expect(postedMessages.length).toBe(2))
+ expect(postedMessages[1].prompt).toBe('prompt-2')
+
+ // Settle task 2; task 3 posts.
+ dispatchDone(postedMessages[1].taskId, 'UNLIKELY | two')
+ await p2
+ await waitFor(() => expect(postedMessages.length).toBe(3))
+ expect(postedMessages[2].prompt).toBe('prompt-3')
+
+ // Settle task 3.
+ dispatchDone(postedMessages[2].taskId, 'LIKELY | three')
+ const r3 = await p3
+ expect(r3.raw).toBe('LIKELY | three')
+ })
+
+ it('does not poison the queue when one task rejects', async () => {
+ const { result } = renderHook(() => useClassifier())
+
+ const p1 = result.current.classifyOne('prompt-A')
+ const p2 = result.current.classifyOne('prompt-B')
+
+ await waitFor(() => expect(postedMessages.length).toBe(1))
+
+ // Reject task 1 via classify_error.
+ capturedListener({ data: { type: 'classify_error', taskId: postedMessages[0].taskId, message: 'boom' } })
+ await expect(p1).rejects.toThrow('boom')
+
+ // Task 2 should still post and resolve.
+ await waitFor(() => expect(postedMessages.length).toBe(2))
+ dispatchDone(postedMessages[1].taskId, 'LIKELY | recovered')
+ const r2 = await p2
+ expect(r2.raw).toBe('LIKELY | recovered')
+ })
+
+ it('rejects pending tasks when the hook unmounts', async () => {
+ const { result, unmount } = renderHook(() => useClassifier())
+
+ const p1 = result.current.classifyOne('prompt-pending')
+ await waitFor(() => expect(postedMessages.length).toBe(1))
+
+ // Mid-flight: unmount.
+ act(() => unmount())
+
+ await expect(p1).rejects.toThrow(/unmounted/)
+ })
+})
diff --git a/src/hooks/useIsMobile.js b/src/hooks/useIsMobile.js
new file mode 100644
index 0000000..a5fb4ee
--- /dev/null
+++ b/src/hooks/useIsMobile.js
@@ -0,0 +1,21 @@
+import { useEffect, useState } from 'react'
+
+export const MOBILE_BREAKPOINT_PX = 820
+
+// matchMedia (not 'resize'): iOS Safari fires 'resize' inconsistently on
+// rotation; matchMedia.change is the reliable signal. Also catches iPad
+// split-screen and browser-window mode switches without a manual resize.
+export function useIsMobile() {
+ const query = `(max-width: ${MOBILE_BREAKPOINT_PX}px)`
+ const [isMobile, setIsMobile] = useState(() =>
+ typeof window !== 'undefined' && window.matchMedia(query).matches
+ )
+ useEffect(() => {
+ const mq = window.matchMedia(query)
+ const onChange = (e) => setIsMobile(e.matches)
+ mq.addEventListener('change', onChange)
+ return () => mq.removeEventListener('change', onChange)
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [])
+ return isMobile
+}
diff --git a/src/workers/nlp.worker.js b/src/workers/nlp.worker.js
index fc93e77..d541141 100644
--- a/src/workers/nlp.worker.js
+++ b/src/workers/nlp.worker.js
@@ -130,6 +130,35 @@ self.onmessage = async (event) => {
return
}
+ if (type === 'translate') {
+ if (!engine) {
+ self.postMessage({ type: 'translate_error', taskId, message: 'Engine not loaded' })
+ return
+ }
+ try {
+ const t0 = Date.now()
+ if (typeof engine.resetChat === 'function') {
+ try { await engine.resetChat() } catch { /* best effort */ }
+ }
+ // Translation typically needs more headroom than classification (one
+ // verdict word + reason fits in 80; a paraphrased clinical sentence
+ // can run 100-200 tokens for verbose languages). Same low temperature
+ // since we want fidelity, not creativity.
+ const request = {
+ messages: [{ role: 'user', content: prompt }],
+ max_tokens: 200,
+ temperature: 0.1,
+ }
+ if (isThinkingModel) request.extra_body = { enable_thinking: false }
+ const reply = await engine.chat.completions.create(request)
+ const raw = reply.choices?.[0]?.message?.content ?? ''
+ self.postMessage({ type: 'translate_done', taskId, raw, latencyMs: Date.now() - t0 })
+ } catch (err) {
+ self.postMessage({ type: 'translate_error', taskId, message: err?.message ?? String(err) })
+ }
+ return
+ }
+
if (type === 'classify') {
if (!engine) {
self.postMessage({ type: 'classify_error', taskId, message: 'Engine not loaded' })
From f99743b11fb128dee3dab7ab66354b7bfe978adf Mon Sep 17 00:00:00 2001
From: John Orgera <65687576+johnoooh@users.noreply.github.com>
Date: Thu, 7 May 2026 01:24:22 -0400
Subject: [PATCH 31/31] chore(phase-3): clear lint diagnostics introduced by
this PR
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
PR #3 added 4 new lint hits on top of main's pre-existing baseline.
All trivial — fixing them keeps the CI lint output clean for future
reviewers (the workflow runs lint as continue-on-error so they don't
block, but fewer ignorable lines is fewer ignorable lines).
- ClassificationHarness.jsx:36 — setConcurrency unused since the
concurrency dropdown was removed (serialization happens in the
hook now). Drop the setter, keep the value as a const.
- ResultCard.jsx:126 — showFit unused since the "Why this might or
might not fit you" section was dropped. Drop the var; comment
notes the path back if a fine-tuned model lets us re-introduce.
- ResultsList.jsx:75 — wrap allTrials in useMemo. react-query keeps
data ref stable across non-data renders so the memo identity is
stable too; without the memo, every render produced a new array
and effect dep arrays comparing against allTrials would have
thrashed (the actual classify trigger effect depends on a derived
trialKeyAll string so this was cosmetic, but cleaner this way).
- useClassifier.js:73-74 — exhaustive-deps disable on the
classifyOne/translateOne useCallbacks. runTask only closes over
refs (stable); the linter can't see through that.
Lint count: 29 → 24 (14 errors, 10 warnings — one fewer than main's
baseline). All remaining are pre-existing.
---
src/components/ClassificationHarness.jsx | 6 +++++-
src/components/ResultCard.jsx | 7 ++++---
src/components/ResultsList.jsx | 8 +++++++-
src/hooks/useClassifier.js | 6 ++++++
4 files changed, 22 insertions(+), 5 deletions(-)
diff --git a/src/components/ClassificationHarness.jsx b/src/components/ClassificationHarness.jsx
index 0a5bdd5..0c2cb67 100644
--- a/src/components/ClassificationHarness.jsx
+++ b/src/components/ClassificationHarness.jsx
@@ -33,7 +33,11 @@ export default function ClassificationHarness() {
const [userDesc, setUserDesc] = useState(USER_PRESETS[0].text)
const [promptTemplate, setPromptTemplate] = useState(DEFAULT_CLASSIFY_PROMPT)
const [trialsJson, setTrialsJson] = useState(JSON.stringify(SAMPLE_TRIALS, null, 2))
- const [concurrency, setConcurrency] = useState(3)
+ // Concurrency was a UI dropdown until we serialized at the hook level
+ // (WebLLM engine is single-threaded). Kept as a constant so the worker
+ // loop still controls fan-out at the harness level — the real
+ // serialization happens in useClassifier's promise chain.
+ const concurrency = 3
const [eligMax, setEligMax] = useState(1500)
const [translateFirst, setTranslateFirst] = useState(false)
const [translatedDesc, setTranslatedDesc] = useState(null)
diff --git a/src/components/ResultCard.jsx b/src/components/ResultCard.jsx
index 49fd93c..e9cc060 100644
--- a/src/components/ResultCard.jsx
+++ b/src/components/ResultCard.jsx
@@ -119,11 +119,12 @@ export default function ResultCard({
: 'bg-white border border-parchment-400 rounded-lg p-5 mb-3 max-w-3xl'
const sumState = simplification?.summarize
- const fitState = simplification?.fit
-
+ // fitState/showFit removed when the "Why this might or might not fit you"
+ // section was dropped — Gemma 2B's accuracy on the fit narrative wasn't
+ // reliable enough to ship. Re-introduce both if the fit section comes
+ // back behind a fine-tuned model.
const showPlainLanguage = sumState && sumState.status !== 'error'
const showFallbackHint = sumState?.status === 'error'
- const showFit = fitState && fitState.status !== 'error' && fitState.text
return (
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index c12a4d9..12813dd 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -72,7 +72,13 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
extractedFields,
})
- const allTrials = data?.pages.flatMap(p => p.trials) ?? []
+ // Memoized so effect dep arrays comparing against allTrials don't churn
+ // every render — react-query returns the same `data` ref while data is
+ // unchanged, so memo identity is stable across non-data renders.
+ const allTrials = useMemo(
+ () => data?.pages.flatMap(p => p.trials) ?? [],
+ [data]
+ )
const isMobile = useIsMobile()
const [selectedNctId, setSelectedNctId] = useState(null)
diff --git a/src/hooks/useClassifier.js b/src/hooks/useClassifier.js
index e899970..69042dd 100644
--- a/src/hooks/useClassifier.js
+++ b/src/hooks/useClassifier.js
@@ -70,7 +70,13 @@ export function useClassifier() {
return next
}
+ // runTask only closes over refs (pendingRef, chainRef, taskIdRef, detachRef)
+ // which are stable across renders, so it's safe to omit from useCallback
+ // deps. The exhaustive-deps lint can't see through this because runTask
+ // is defined in the function body each render.
+ // eslint-disable-next-line react-hooks/exhaustive-deps
const classifyOne = useCallback((prompt) => runTask('classify', 'classify', prompt), [])
+ // eslint-disable-next-line react-hooks/exhaustive-deps
const translateOne = useCallback((prompt) => runTask('translate', 'translate', prompt), [])
return { classifyOne, translateOne }