From 179501f867e22076a21834bb47f34734f5c7aedc Mon Sep 17 00:00:00 2001 From: John Orgera <65687576+johnoooh@users.noreply.github.com> Date: Wed, 6 May 2026 21:32:51 -0400 Subject: [PATCH 01/31] feat(phase-3): wire classification harness to live on-device model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements the harness validation rig from Handoff Phase 3, ported from the standalone HTML file into the React app so it reuses the already- loaded worker (no second model download needed). - worker: new 'classify' task type. One-shot completion, low max_tokens (80), temperature 0.1, KV-cache reset between calls. Returns { type: 'classify_done', taskId, raw, latencyMs } or 'classify_error'. - useClassifier hook: thin wrapper that posts 'classify' to the shared worker, tracks pending tasks by taskId, resolves with raw + latency. Caller parses the verdict. - ClassificationHarness component: ports the standalone harness UI to React + Tailwind. Inputs: user description, prompt template, trials JSON (fixture preset), concurrency (1/2/3/5), eligibility max chars. Renders results table with verdict pill (LIKELY/POSSIBLE/UNLIKELY/ PARSE_FAIL), latency, raw output, expected vs actual checkmark. Stats: parse rate, avg/max latency, agreement. - App.jsx: dev-only ?test=classify route, lazy-loaded so production bundle stays unaffected. Pass criteria from Handoff (parse ≥90%, avg latency <1.5s, agreement ≥80%) shown inline so the user can validate before deciding to wire classification into the actual results UI. This is the validation gate, not the in-app classification pipeline itself. Once the harness numbers look good, a follow-up PR adds classifyAll() to ResultsList and the fit meter to TriageRow. --- src/App.jsx | 14 +- src/components/ClassificationHarness.jsx | 420 +++++++++++++++++++++++ src/hooks/useClassifier.js | 49 +++ src/workers/nlp.worker.js | 28 ++ 4 files changed, 510 insertions(+), 1 deletion(-) create mode 100644 src/components/ClassificationHarness.jsx create mode 100644 src/hooks/useClassifier.js diff --git a/src/App.jsx b/src/App.jsx index 83666b7..b4ef669 100644 --- a/src/App.jsx +++ b/src/App.jsx @@ -18,12 +18,15 @@ const NLPTestPanel = import.meta.env.DEV const ProdScenarioTestPanel = import.meta.env.DEV ? lazy(() => import('./components/ProdScenarioTestPanel')) : null +const ClassificationHarness = import.meta.env.DEV + ? lazy(() => import('./components/ClassificationHarness')) + : null function getTestRoute() { if (typeof window === 'undefined') return null if (!import.meta.env.DEV) return null const t = new URLSearchParams(window.location.search).get('test') - return t === 'nlp' || t === 'scenarios' ? t : null + return t === 'nlp' || t === 'scenarios' || t === 'classify' ? t : null } function IrisApp() { @@ -45,6 +48,15 @@ function IrisApp() { ) } + if (testRoute === 'classify' && ClassificationHarness) { + return ( +
+ Loading classification harness…
}> + + + + ) + } if (testRoute === 'scenarios' && ProdScenarioTestPanel) { // ProdScenarioTestPanel calls fetch directly, so it doesn't need a query // client. IrisApp is already inside the App() QueryClientProvider, so no diff --git a/src/components/ClassificationHarness.jsx b/src/components/ClassificationHarness.jsx new file mode 100644 index 0000000..c220dd4 --- /dev/null +++ b/src/components/ClassificationHarness.jsx @@ -0,0 +1,420 @@ +import { useState, useEffect } from 'react' +import { useNLP } from '../hooks/useNLP' +import { useClassifier } from '../hooks/useClassifier' +import { NLP_MODELS, resolveModelKey } from '../utils/nlpModels' + +const SAMPLE_TRIALS = [ + { + nctId: 'NCT05952557', + title: 'Phase IIIb Study of Ribociclib + Endocrine Therapy in Early Breast Cancer', + eligibility: 'Inclusion: Adult female, ≥18 years. HR-positive, HER2-negative early breast cancer. Completed definitive surgery. Postmenopausal status confirmed. ECOG 0-1. Adequate organ function. Exclusion: Prior CDK4/6 inhibitor. Pregnancy or breastfeeding. Active second malignancy.', + expected: 'LIKELY', + }, + { + nctId: 'NCT06104020', + title: 'Sacituzumab Govitecan in Metastatic Triple-Negative Breast Cancer', + eligibility: 'Inclusion: Adult, any sex. Histologically confirmed metastatic triple-negative breast cancer (ER<1%, PR<1%, HER2-negative). At least one prior line of systemic therapy in metastatic setting. ECOG 0-2. Measurable disease per RECIST 1.1. Exclusion: Active CNS metastases. Prior topoisomerase I inhibitor.', + expected: 'POSSIBLE', + }, + { + nctId: 'NCT05887492', + title: 'Adaptive Radiation Boost in Locally Advanced HER2+ Breast Cancer', + eligibility: 'Inclusion: Adult female. HER2-positive breast cancer confirmed by IHC 3+ or FISH-positive. Stage II-III disease. Completed neoadjuvant chemotherapy. ECOG 0-1. Exclusion: Prior radiation to chest. Pregnancy.', + expected: 'POSSIBLE', + }, + { + nctId: 'NCT06221340', + title: 'Aerobic Exercise During Adjuvant Chemo for Breast Cancer Survivors', + eligibility: 'Inclusion: Adult, any sex. Breast cancer, any stage. Currently receiving or scheduled for adjuvant chemotherapy. Cleared by oncologist for moderate exercise. Exclusion: Cardiac contraindications.', + expected: 'LIKELY', + }, + { + nctId: 'NCT04123456', + title: 'Pembrolizumab in Advanced Non-Small Cell Lung Cancer', + eligibility: 'Inclusion: Adult. Histologically confirmed advanced NSCLC. PD-L1 expression ≥50%. ECOG 0-1. Exclusion: Active autoimmune disease. Prior immunotherapy.', + expected: 'UNLIKELY', + }, + { + nctId: 'NCT05123987', + title: 'Targeted Therapy in Pediatric Acute Lymphoblastic Leukemia', + eligibility: 'Inclusion: Pediatric patients aged 2-17 years. Newly diagnosed ALL. Exclusion: Adults. Prior chemotherapy.', + expected: 'UNLIKELY', + }, +] + +const DEFAULT_PROMPT = `You are evaluating clinical trial fit. + +User: {{user}} +Trial title: {{title}} +Eligibility (excerpt): {{eligibility}} + +Reply on one line, exactly: VERDICT | one-sentence reason +where VERDICT is LIKELY, POSSIBLE, or UNLIKELY.` + +const DEFAULT_USER_DESC = "I'm 58 years old with breast cancer in Boston" + +function parseVerdict(raw) { + if (!raw || typeof raw !== 'string') return { verdict: 'PARSE_FAIL', reason: '(empty output)' } + const m = raw.match(/^\s*(LIKELY|POSSIBLE|UNLIKELY)\s*[|:\-—]\s*(.+?)\s*$/im) + if (m) return { verdict: m[1].toUpperCase(), reason: m[2].trim() } + const w = raw.match(/\b(LIKELY|POSSIBLE|UNLIKELY)\b/i) + if (w) { + return { + verdict: w[1].toUpperCase(), + reason: raw.replace(w[0], '').replace(/^[\s|:\-—]+/, '').trim() || '(no reason)', + } + } + return { verdict: 'PARSE_FAIL', reason: raw.slice(0, 120) } +} + +const VERDICT_STYLES = { + LIKELY: 'bg-signal-good-bg text-signal-good', + POSSIBLE: 'bg-signal-warn-bg text-signal-warn', + UNLIKELY: 'bg-parchment-200 text-parchment-700', + PARSE_FAIL: 'bg-signal-bad-bg text-signal-bad', + PENDING: 'bg-parchment-100 text-parchment-700', +} + +export default function ClassificationHarness() { + const [modelKey] = useState(() => + resolveModelKey(typeof window !== 'undefined' ? window.location.search : '') + ) + const model = NLP_MODELS[modelKey] + const { status, progress, error, load, webGPUSupported } = useNLP() + const { classifyOne } = useClassifier() + + const [userDesc, setUserDesc] = useState(DEFAULT_USER_DESC) + const [promptTemplate, setPromptTemplate] = useState(DEFAULT_PROMPT) + const [trialsJson, setTrialsJson] = useState(JSON.stringify(SAMPLE_TRIALS, null, 2)) + const [concurrency, setConcurrency] = useState(3) + const [eligMax, setEligMax] = useState(1500) + const [results, setResults] = useState([]) + const [running, setRunning] = useState(false) + const [startT, setStartT] = useState(0) + const [, setTick] = useState(0) + + // Lightweight ticker so elapsed time updates while a run is in flight. + useEffect(() => { + if (!running) return + const id = setInterval(() => setTick(t => t + 1), 250) + return () => clearInterval(id) + }, [running]) + + function getProgressLabel() { + if (!progress) return 'Loading model…' + return progress.text || `Loading model… ${Math.round((progress.progress ?? 0) * 100)}%` + } + + async function run() { + let trials + try { + trials = JSON.parse(trialsJson) + if (!Array.isArray(trials)) throw new Error('Not an array') + } catch (e) { + alert('Trials JSON is invalid: ' + e.message) + return + } + + setRunning(true) + setStartT(performance.now()) + const initial = trials.map(trial => ({ trial, status: 'PENDING' })) + setResults(initial) + + const queue = trials.map((trial, idx) => ({ idx, trial })) + const workersN = Math.min(concurrency, trials.length) + + async function worker() { + while (queue.length) { + const { idx, trial } = queue.shift() + const elig = (trial.eligibility || '').slice(0, eligMax) + const prompt = promptTemplate + .replace('{{user}}', userDesc) + .replace('{{title}}', trial.title || trial.briefTitle || '') + .replace('{{eligibility}}', elig) + try { + const { raw, latencyMs } = await classifyOne(prompt) + const parsed = parseVerdict(raw) + setResults(prev => { + const next = [...prev] + next[idx] = { trial, status: 'DONE', raw, latencyMs, ...parsed } + return next + }) + } catch (err) { + setResults(prev => { + const next = [...prev] + next[idx] = { + trial, + status: 'DONE', + raw: '', + latencyMs: 0, + verdict: 'PARSE_FAIL', + reason: err?.message ?? 'classify error', + } + return next + }) + } + } + } + + await Promise.all(Array.from({ length: workersN }, worker)) + setRunning(false) + } + + function reset() { + setTrialsJson(JSON.stringify(SAMPLE_TRIALS, null, 2)) + setResults([]) + } + + // ───────── stats ───────── + const done = results.filter(r => r.status === 'DONE') + const lats = done.map(r => r.latencyMs).filter(n => n != null) + const avgLat = lats.length ? Math.round(lats.reduce((a, b) => a + b, 0) / lats.length) : 0 + const maxLat = lats.length ? Math.round(Math.max(...lats)) : 0 + const parseFails = done.filter(r => r.verdict === 'PARSE_FAIL').length + const parseRate = done.length ? Math.round(((done.length - parseFails) / done.length) * 100) : 0 + const elapsed = startT ? ((performance.now() - startT) / 1000).toFixed(1) : '0.0' + const withExpected = done.filter(r => r.trial.expected) + const matches = withExpected.filter(r => r.verdict === r.trial.expected).length + const agreementPct = withExpected.length ? Math.round((matches / withExpected.length) * 100) : null + + const canRun = status === 'ready' && !running + + return ( +
+

+ Classification harness +

+

+ Validate the proposed Stage-1 classifier (LIKELY / POSSIBLE / UNLIKELY) against real + ClinicalTrials.gov payloads using the on-device {model.label}. Pass criteria from the + Handoff: parse rate ≥ 90%, avg latency < 1.5s, agreement ≥ 80%. +

+ +
+
+
+
model
+
+ {model.label} ({model.sizeLabel}) · status:{' '} + + {status} + + {status === 'downloading' && progress && ( + · {Math.round((progress.progress ?? 0) * 100)}% + )} +
+ {status === 'downloading' && ( +

{getProgressLabel()}

+ )} + {!webGPUSupported && ( +

WebGPU unavailable in this browser.

+ )} + {error &&

{error}

} +
+ {status !== 'ready' && status !== 'downloading' && webGPUSupported && ( + + )} +
+
+ +
+

Inputs

+
+
+ +