diff --git a/docs/design-references-shared/README.md b/docs/design-references-shared/README.md
new file mode 100644
index 0000000..489ae34
--- /dev/null
+++ b/docs/design-references-shared/README.md
@@ -0,0 +1,17 @@
+# shared/iris-shared.jsx — design reference, not source
+
+Reference implementations from the original Claude.ai design exploration.
+Components in this file (`IrisHeader`, `IrisSearchBar`, `LocalAIBadge`,
+`FitMeter`, `StatusPill`, `ActionRow`, `StreamingText`, …) were ported into
+the live React app under `src/components/` and `src/utils/` — the versions
+here are kept verbatim so a future reader can compare implementations
+against the original prototype.
+
+**Do not import from this file in `src/`.** It runs against a Babel-standalone
+environment in `IRIS Triage.html` and uses inline-style patterns the live
+app intentionally moved away from (the live app uses Tailwind utility
+classes on top of CSS custom properties from `styles/tokens.css`).
+
+If you're trying to "fix" or "consolidate" this file: stop. Edit the live
+component under `src/components/` instead. The existence of this file is
+documentation, not duplication.
diff --git a/shared/iris-shared.jsx b/docs/design-references-shared/iris-shared.jsx
similarity index 100%
rename from shared/iris-shared.jsx
rename to docs/design-references-shared/iris-shared.jsx
diff --git a/src/App.jsx b/src/App.jsx
index 83666b7..b4ef669 100644
--- a/src/App.jsx
+++ b/src/App.jsx
@@ -18,12 +18,15 @@ const NLPTestPanel = import.meta.env.DEV
const ProdScenarioTestPanel = import.meta.env.DEV
? lazy(() => import('./components/ProdScenarioTestPanel'))
: null
+const ClassificationHarness = import.meta.env.DEV
+ ? lazy(() => import('./components/ClassificationHarness'))
+ : null
function getTestRoute() {
if (typeof window === 'undefined') return null
if (!import.meta.env.DEV) return null
const t = new URLSearchParams(window.location.search).get('test')
- return t === 'nlp' || t === 'scenarios' ? t : null
+ return t === 'nlp' || t === 'scenarios' || t === 'classify' ? t : null
}
function IrisApp() {
@@ -45,6 +48,15 @@ function IrisApp() {
)
}
+ if (testRoute === 'classify' && ClassificationHarness) {
+ return (
+
+ Loading classification harness…
}>
+
+
+
+ )
+ }
if (testRoute === 'scenarios' && ProdScenarioTestPanel) {
// ProdScenarioTestPanel calls fetch directly, so it doesn't need a query
// client. IrisApp is already inside the App() QueryClientProvider, so no
diff --git a/src/components/ClassificationHarness.fixtures.js b/src/components/ClassificationHarness.fixtures.js
new file mode 100644
index 0000000..c33b3bc
--- /dev/null
+++ b/src/components/ClassificationHarness.fixtures.js
@@ -0,0 +1,332 @@
+// Fixture data for the dev-only Classification Harness (?test=classify).
+// Lives next to the component but split out because the trial array is
+// 300+ lines and made the harness file hard to navigate when iterating
+// on prompts vs data.
+//
+// `outOfScope: true` flags trials the CT.gov API would NOT return for
+// a breast-cancer search — kept in the fixture as wrong-condition
+// stress tests, but the harness's "production-realistic agreement"
+// toggle excludes them from the headline metric.
+
+export const SAMPLE_TRIALS = [
+ {
+ nctId: 'NCT05952557',
+ title: 'Phase IIIb Study of Ribociclib + Endocrine Therapy in Early Breast Cancer',
+ eligibility: 'Inclusion: Adult female, ≥18 years. HR-positive, HER2-negative early breast cancer. Completed definitive surgery. Postmenopausal status confirmed. ECOG 0-1. Adequate organ function. Exclusion: Prior CDK4/6 inhibitor. Pregnancy or breastfeeding. Active second malignancy.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT06104020',
+ title: 'Sacituzumab Govitecan in Metastatic Triple-Negative Breast Cancer',
+ eligibility: 'Inclusion: Adult, any sex. Histologically confirmed metastatic triple-negative breast cancer (ER<1%, PR<1%, HER2-negative). At least one prior line of systemic therapy in metastatic setting. ECOG 0-2. Measurable disease per RECIST 1.1. Exclusion: Active CNS metastases. Prior topoisomerase I inhibitor.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05887492',
+ title: 'Adaptive Radiation Boost in Locally Advanced HER2+ Breast Cancer',
+ eligibility: 'Inclusion: Adult female. HER2-positive breast cancer confirmed by IHC 3+ or FISH-positive. Stage II-III disease. Completed neoadjuvant chemotherapy. ECOG 0-1. Exclusion: Prior radiation to chest. Pregnancy.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT06221340',
+ title: 'Aerobic Exercise During Adjuvant Chemo for Breast Cancer Survivors',
+ eligibility: 'Inclusion: Adult, any sex. Breast cancer, any stage. Currently receiving or scheduled for adjuvant chemotherapy. Cleared by oncologist for moderate exercise. Exclusion: Cardiac contraindications.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT04123456',
+ title: 'Pembrolizumab in Advanced Non-Small Cell Lung Cancer',
+ eligibility: 'Inclusion: Adult. Histologically confirmed advanced NSCLC. PD-L1 expression ≥50%. ECOG 0-1. Exclusion: Active autoimmune disease. Prior immunotherapy.',
+ expected: 'UNLIKELY',
+ outOfScope: true, // NSCLC — wouldn't appear in a breast-cancer API search
+ },
+ {
+ nctId: 'NCT05123987',
+ title: 'Targeted Therapy in Pediatric Acute Lymphoblastic Leukemia',
+ eligibility: 'Inclusion: Pediatric patients aged 2-17 years. Newly diagnosed ALL. Exclusion: Adults. Prior chemotherapy.',
+ expected: 'UNLIKELY',
+ outOfScope: true, // Pediatric ALL — wouldn't appear in a breast-cancer API search
+ },
+
+ // ─── Subtype-gated breast cancer trials — POSSIBLE without confirmed subtype ───
+ {
+ nctId: 'NCT05300100',
+ title: 'Tucatinib + Trastuzumab in HER2-Positive Metastatic Breast Cancer',
+ eligibility: 'Inclusion: Adult, any sex, ≥18 years. Histologically confirmed HER2-positive metastatic breast cancer (IHC 3+ or FISH-amplified). At least 2 prior HER2-directed therapies. ECOG 0-1. Exclusion: Untreated brain metastases. Prior tucatinib.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05400201',
+ title: 'Olaparib Maintenance in BRCA-Mutated HER2-Negative Breast Cancer',
+ eligibility: 'Inclusion: Adult female. HER2-negative breast cancer with germline BRCA1 or BRCA2 mutation (confirmed by central testing). High-risk early disease following adjuvant chemotherapy. Postmenopausal or premenopausal with ovarian suppression. Exclusion: Prior PARP inhibitor.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05511223',
+ title: 'CDK4/6 Inhibitor Switch in Hormone-Receptor-Positive Advanced Breast Cancer',
+ eligibility: 'Inclusion: Adult women, postmenopausal. HR-positive, HER2-negative advanced or metastatic breast cancer. Disease progression on a prior CDK4/6 inhibitor. ECOG 0-2.',
+ expected: 'POSSIBLE',
+ },
+
+ // ─── Strong matches for a 58yo with breast cancer ───
+ {
+ nctId: 'NCT05633445',
+ title: 'Cognitive Behavioral Therapy for Cancer-Related Fatigue',
+ eligibility: 'Inclusion: Adults ≥18 years with any solid tumor diagnosis (breast, colon, lung, prostate, etc.). Currently in active treatment or within 5 years of treatment completion. Self-reported fatigue ≥4 on a 0-10 scale. Exclusion: Severe untreated depression. Inability to attend weekly sessions.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT05755677',
+ title: 'Lymphedema Surveillance Program After Breast Cancer Surgery',
+ eligibility: 'Inclusion: Adult female ≥18 years. History of breast cancer treated with axillary surgery (sentinel lymph node biopsy or axillary dissection). Within 3 years of surgery. Exclusion: Pre-existing lymphedema. Current breast cancer recurrence.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT05822334',
+ title: 'Mindfulness-Based Stress Reduction for Breast Cancer Survivors',
+ eligibility: 'Inclusion: Adult women ≥21 years. Diagnosed with breast cancer (any stage). Completed primary treatment within the past 5 years OR currently on adjuvant endocrine therapy. Exclusion: Active psychosis. Prior MBSR participation.',
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT05901128',
+ title: 'Vaginal Estrogen Safety Study in Postmenopausal Breast Cancer Survivors',
+ eligibility: 'Inclusion: Postmenopausal women ages 45-75 with a history of HR-positive or HR-negative breast cancer. Disease-free for ≥1 year. Genitourinary symptoms of menopause. Stable on aromatase inhibitor or tamoxifen, or treatment-free. Exclusion: Current metastatic disease.',
+ expected: 'LIKELY',
+ },
+
+ // ─── Wrong condition / wrong demographic — clear UNLIKELY ───
+ {
+ nctId: 'NCT04567890',
+ title: 'Pembrolizumab in Advanced Melanoma',
+ eligibility: 'Inclusion: Adults with histologically confirmed unresectable Stage III or Stage IV melanoma. ECOG 0-1. No prior systemic therapy for advanced disease. Exclusion: Active autoimmune disease.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04678901',
+ title: 'Apixaban vs. Warfarin in Atrial Fibrillation',
+ eligibility: 'Inclusion: Adults ≥18 years with non-valvular atrial fibrillation. CHA2DS2-VASc score ≥2. Exclusion: Mechanical heart valve. Active bleeding.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04789012',
+ title: 'GLP-1 Agonist for Weight Management in Type 2 Diabetes',
+ eligibility: 'Inclusion: Adults 18-75 with Type 2 diabetes mellitus. BMI ≥30. HbA1c 7.0-10.0%. Exclusion: Type 1 diabetes. Active malignancy within 5 years. History of pancreatitis.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04890123',
+ title: 'Robotic Prostatectomy Outcomes in Localized Prostate Cancer',
+ eligibility: 'Inclusion: Men ≥40 years with biopsy-confirmed clinically localized prostate cancer (T1-T2). Candidate for radical prostatectomy. Exclusion: Prior pelvic surgery or radiation.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+ {
+ nctId: 'NCT04901234',
+ title: 'Pediatric Vaccine Immunogenicity Study',
+ eligibility: 'Inclusion: Healthy children aged 6 months to 5 years. Up to date on routine immunizations. Exclusion: Immunocompromised. Recent illness within 14 days.',
+ expected: 'UNLIKELY',
+ outOfScope: true,
+ },
+
+ // ─── Edge cases — should challenge the model ───
+ {
+ nctId: 'NCT05012345',
+ title: 'Palliative Care Integration in Patients with Advanced Solid Tumors',
+ eligibility: 'Inclusion: Adults ≥18 years with advanced (Stage IV) solid tumor of any primary site (breast, lung, GI, GU, GYN). Estimated prognosis 6-24 months. ECOG 0-3. Exclusion: Currently enrolled in hospice.',
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT05123450',
+ title: 'Premenopausal Breast Cancer: Ovarian Function Suppression Trial',
+ eligibility: 'Inclusion: Premenopausal women ages 18-45 with newly diagnosed HR-positive early breast cancer. Confirmed premenopausal by FSH and estradiol levels. Exclusion: Postmenopausal status. Prior ovarian suppression therapy.',
+ expected: 'UNLIKELY',
+ },
+
+ // ─── Realistic-length eligibility (~2-3.5kB each) — stress-tests how the
+ // model handles formal CT.gov noise and how truncation affects accuracy.
+ // Try these with eligMax = 800 vs 3000 vs 6000 to see the trade-off.
+ {
+ nctId: 'NCT-LONG-01',
+ title: 'Phase II Study of Sacituzumab Govitecan-hziy in Patients with HR-Positive, HER2-Negative Metastatic Breast Cancer After Endocrine Therapy and CDK4/6 Inhibitor',
+ eligibility: `Inclusion Criteria:
+
+1. Female participants ≥18 years of age at the time of signing informed consent.
+2. Histologically or cytologically confirmed adenocarcinoma of the breast that is metastatic or locally advanced and not amenable to curative resection or radiotherapy.
+3. Documentation of estrogen receptor (ER)-positive (≥1% staining by IHC) and/or progesterone receptor (PR)-positive (≥1% staining by IHC) tumor status, in accordance with ASCO/CAP guidelines.
+4. Documentation of HER2-negative status defined as IHC 0, IHC 1+, or IHC 2+ with negative in situ hybridization (ISH), per ASCO/CAP guidelines.
+5. Disease progression on or after at least one prior CDK4/6 inhibitor (palbociclib, ribociclib, or abemaciclib) administered for advanced or metastatic disease, in combination with an aromatase inhibitor or fulvestrant.
+6. Disease progression on or after at least one and no more than two prior endocrine therapies (e.g., aromatase inhibitor, fulvestrant, tamoxifen) for advanced or metastatic disease.
+7. No more than one prior chemotherapy regimen for metastatic disease.
+8. Postmenopausal status, OR premenopausal/perimenopausal women who agree to receive concurrent ovarian function suppression with a luteinizing hormone-releasing hormone (LHRH) agonist throughout study treatment.
+9. Measurable disease per RECIST v1.1, or non-measurable bone-only disease assessable per protocol-specified criteria.
+10. ECOG performance status 0 or 1.
+11. Adequate organ function:
+ - Absolute neutrophil count (ANC) ≥1.5 × 10^9/L
+ - Platelets ≥100 × 10^9/L
+ - Hemoglobin ≥9.0 g/dL (transfusion permitted)
+ - Total bilirubin ≤1.5 × ULN (≤3 × ULN for participants with documented Gilbert syndrome)
+ - AST and ALT ≤2.5 × ULN (≤5 × ULN if liver metastases present)
+ - Creatinine clearance ≥50 mL/min by Cockcroft-Gault equation
+ - INR and aPTT ≤1.5 × ULN unless on anticoagulants
+12. Resolution of all acute toxic effects of prior anti-cancer therapy or surgical procedures to NCI CTCAE v5.0 Grade ≤1 (except alopecia and Grade 2 neuropathy).
+13. Willingness to provide tumor tissue (archival or fresh biopsy) for biomarker analyses.
+
+Exclusion Criteria:
+
+1. Prior treatment with sacituzumab govitecan or any other Trop-2-directed therapy.
+2. Prior treatment with an antibody-drug conjugate containing a topoisomerase I inhibitor payload (e.g., trastuzumab deruxtecan).
+3. Active CNS metastases. Participants with previously treated, asymptomatic CNS metastases are eligible if clinically stable for ≥4 weeks off corticosteroids and anticonvulsants.
+4. Leptomeningeal disease.
+5. Known active infection requiring systemic therapy, including untreated HIV, active HBV (HBsAg positive or HBV DNA detectable), or active HCV (HCV RNA detectable).
+6. Significant cardiovascular disease, including: NYHA Class III or IV congestive heart failure, myocardial infarction or unstable angina within 6 months, uncontrolled arrhythmia, baseline QTcF >470 ms.
+7. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer, in situ cervical or breast cancer, or low-risk localized prostate cancer on active surveillance.
+8. Known hypersensitivity to irinotecan or any component of the study drug formulation.
+9. Pregnant or breastfeeding women. Women of childbearing potential must agree to use highly effective contraception during the study and for 6 months after the last dose.
+10. Concurrent participation in another therapeutic clinical trial.
+11. Major surgery within 4 weeks prior to first dose.
+12. Live vaccines within 30 days prior to first dose.`,
+ expected: 'POSSIBLE',
+ },
+ {
+ nctId: 'NCT-LONG-02',
+ title: 'Randomized Phase III Trial of Adjuvant Endocrine Therapy ± Abemaciclib in Postmenopausal Women with HR-Positive, HER2-Negative, Node-Positive Early Breast Cancer at High Risk of Recurrence',
+ eligibility: `Inclusion Criteria:
+
+1. Female, postmenopausal at the time of randomization. Postmenopausal status defined as: (a) prior bilateral oophorectomy, (b) age ≥60 years, OR (c) age <60 with amenorrhea ≥12 months in the absence of chemotherapy, tamoxifen, or ovarian suppression AND FSH and estradiol in the postmenopausal range.
+2. Age 18 to 75 years inclusive at the time of consent.
+3. ECOG performance status of 0, 1, or 2.
+4. Histologically confirmed invasive breast carcinoma. Multicentric or multifocal disease is allowed if all foci meet eligibility.
+5. Hormone receptor-positive disease, defined as ≥1% of tumor cells staining positive for estrogen receptor and/or progesterone receptor by IHC, per ASCO/CAP guidelines.
+6. HER2-negative disease, defined as IHC 0, 1+, or 2+ with negative reflex ISH testing per ASCO/CAP guidelines.
+7. Stage II or III disease with high-risk pathologic features, defined as ≥1 of the following:
+ - ≥4 positive axillary lymph nodes, OR
+ - 1-3 positive axillary lymph nodes AND tumor size ≥5 cm, OR
+ - 1-3 positive axillary lymph nodes AND histologic grade 3, OR
+ - 1-3 positive axillary lymph nodes AND Ki-67 ≥20%
+8. Definitive surgical treatment of primary tumor with negative margins (lumpectomy with whole-breast irradiation OR mastectomy with or without post-mastectomy radiation per institutional standard).
+9. Completion of any neoadjuvant or adjuvant chemotherapy at least 21 days but no more than 16 months prior to randomization.
+10. Initiation of adjuvant endocrine therapy (aromatase inhibitor, with or without LHRH agonist) is permitted, but participants must not have received endocrine therapy for more than 12 weeks prior to randomization.
+11. Adequate organ function within 14 days of randomization:
+ - ANC ≥1.5 × 10^9/L
+ - Platelets ≥100 × 10^9/L
+ - Hemoglobin ≥10.0 g/dL
+ - Total bilirubin ≤1.5 × ULN
+ - AST/ALT ≤2.5 × ULN
+ - Creatinine clearance ≥50 mL/min
+12. Negative serum or urine pregnancy test for participants of childbearing potential.
+
+Exclusion Criteria:
+
+1. Stage IV (metastatic) breast cancer or evidence of distant metastases on staging imaging.
+2. Inflammatory breast cancer.
+3. Bilateral invasive breast cancer.
+4. Prior treatment with any CDK4/6 inhibitor in any setting.
+5. Prior anti-cancer therapy other than chemotherapy and locoregional therapy for the current breast cancer diagnosis.
+6. History of another malignancy within 5 years prior to randomization, except adequately treated non-melanoma skin cancer, in situ cervical cancer, or contralateral DCIS.
+7. Active or chronic hepatitis B or C infection, or known HIV infection.
+8. Significant uncontrolled cardiovascular disease: NYHA Class III/IV heart failure, myocardial infarction within 6 months, ventricular arrhythmia requiring treatment.
+9. History of interstitial lung disease or pneumonitis requiring corticosteroids.
+10. Major surgery (other than breast cancer surgery) within 28 days of randomization.
+11. Receiving strong CYP3A inhibitors or inducers within 14 days that cannot be discontinued.
+12. Inability to swallow oral medications or significant malabsorption.
+13. Pregnant or breastfeeding (premenopausal participants only — see inclusion criterion 1).`,
+ expected: 'LIKELY',
+ },
+ {
+ nctId: 'NCT-LONG-03',
+ title: 'Phase III Study of Pembrolizumab Plus Chemotherapy versus Chemotherapy Alone for First-Line Treatment of Metastatic Squamous Non-Small Cell Lung Cancer',
+ outOfScope: true,
+ eligibility: `Inclusion Criteria:
+
+1. Histologically or cytologically confirmed Stage IV squamous non-small cell lung cancer (NSCLC) per AJCC 8th edition.
+2. Male or female ≥18 years of age.
+3. No prior systemic therapy for metastatic NSCLC. Prior adjuvant or neoadjuvant chemotherapy is allowed if completed ≥6 months prior to enrollment.
+4. Measurable disease per RECIST v1.1.
+5. Provision of a tumor tissue sample (archival or fresh biopsy) adequate for PD-L1 IHC testing using the 22C3 pharmDx assay.
+6. ECOG performance status 0 or 1.
+7. Life expectancy ≥3 months.
+8. Adequate organ function within 10 days of randomization:
+ - ANC ≥1.5 × 10^9/L without G-CSF support
+ - Platelets ≥100 × 10^9/L without transfusion
+ - Hemoglobin ≥9.0 g/dL
+ - Total bilirubin ≤1.5 × ULN
+ - AST/ALT ≤2.5 × ULN (≤5 × ULN if liver involvement)
+ - Creatinine clearance ≥45 mL/min
+ - INR/aPTT ≤1.5 × ULN
+9. Female participants of childbearing potential and male participants with partners of childbearing potential must agree to use effective contraception throughout treatment and for 120 days after last dose.
+
+Exclusion Criteria:
+
+1. Histology of mixed small cell and non-small cell lung cancer, or predominantly non-squamous histology.
+2. Known sensitizing EGFR mutation, ALK rearrangement, ROS1 rearrangement, BRAF V600E mutation, or other actionable alteration for which an approved targeted therapy is the standard of care.
+3. Prior treatment with any PD-1, PD-L1, PD-L2, or CTLA-4 inhibitor.
+4. Active autoimmune disease requiring systemic immunosuppression within 2 years. Replacement therapy (e.g., thyroxine, insulin, physiologic corticosteroids) is permitted.
+5. History of pneumonitis requiring corticosteroids, or active pneumonitis.
+6. Active CNS metastases or carcinomatous meningitis. Participants with previously treated, asymptomatic CNS metastases stable for ≥4 weeks may be eligible.
+7. Active infection requiring systemic therapy.
+8. Known active HIV, HBV, or HCV infection.
+9. Live vaccine within 30 days of first dose.
+10. History of solid organ or allogeneic stem cell transplant.
+11. Pregnant or breastfeeding women.
+12. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer or in situ disease.`,
+ expected: 'UNLIKELY',
+ },
+ {
+ nctId: 'NCT-LONG-04',
+ title: 'Multicenter Randomized Trial of Empagliflozin in Patients with Heart Failure with Preserved Ejection Fraction and Type 2 Diabetes',
+ outOfScope: true,
+ eligibility: `Inclusion Criteria:
+
+1. Adults aged 40 to 85 years at consent.
+2. Documented diagnosis of heart failure with preserved ejection fraction (HFpEF):
+ - Left ventricular ejection fraction (LVEF) ≥50% on echocardiogram within the past 12 months
+ - NYHA functional class II, III, or IV
+ - Elevated NT-proBNP ≥300 pg/mL (or ≥600 pg/mL if atrial fibrillation present)
+ - Structural heart disease on echocardiography (LV hypertrophy or left atrial enlargement) OR documented prior HF hospitalization
+3. Documented Type 2 diabetes mellitus (T2DM) per ADA criteria, with HbA1c 6.5% to 10.0% at screening.
+4. Stable background heart failure therapy for ≥4 weeks (diuretic if indicated; ACEi/ARB/ARNI per guideline; beta-blocker per guideline).
+5. eGFR ≥25 mL/min/1.73m^2 by CKD-EPI equation.
+6. Body mass index 20 to 45 kg/m^2.
+7. Able and willing to provide written informed consent and adhere to study procedures.
+
+Exclusion Criteria:
+
+1. Type 1 diabetes mellitus.
+2. History of diabetic ketoacidosis within 12 months.
+3. LVEF <50% on most recent echocardiogram.
+4. Acute decompensated heart failure requiring IV diuretics within 4 weeks of screening.
+5. Acute coronary syndrome, stroke, or transient ischemic attack within 90 days.
+6. Planned cardiac surgery, percutaneous coronary intervention, or device implantation within 90 days.
+7. Symptomatic hypotension or systolic blood pressure <100 mmHg at screening.
+8. Significant valvular heart disease (severe aortic stenosis, severe mitral regurgitation requiring surgery).
+9. Hypertrophic cardiomyopathy, infiltrative cardiomyopathy, or constrictive pericarditis.
+10. eGFR <25 mL/min/1.73m^2 or end-stage renal disease requiring dialysis.
+11. Known active malignancy requiring treatment within the past 12 months. Participants with a history of cancer who are disease-free for >12 months are eligible.
+12. Severe hepatic impairment (Child-Pugh C).
+13. Pregnancy or breastfeeding.
+14. Known hypersensitivity to SGLT2 inhibitors.
+15. Participation in another interventional clinical trial within 30 days.
+16. Life expectancy <12 months due to non-cardiovascular cause.`,
+ expected: 'UNLIKELY',
+ },
+]
+
+// Patient description presets for multilingual + edge-case validation. Same
+// 58yo woman with breast cancer in Boston, expressed in different languages
+// and registers (formal, terse, etc.) so we can stress-test the model's
+// understanding without changing the underlying clinical signal.
+export const USER_PRESETS = [
+ { id: 'en', label: 'English', text: "I'm 58 years old with breast cancer in Boston" },
+ { id: 'en-2', label: 'English (more detail)', text: "58-year-old woman in Boston, postmenopausal, recently diagnosed with breast cancer, looking for post-chemo treatment options" },
+ { id: 'es', label: 'Spanish (Español)', text: 'Tengo 58 años, vivo en Boston y tengo cáncer de mama' },
+ { id: 'es-2', label: 'Spanish (more detail)', text: 'Soy mujer de 58 años, posmenopáusica, vivo en Boston. Me diagnosticaron cáncer de mama y busco opciones de tratamiento después de quimioterapia.' },
+ { id: 'zh', label: 'Mandarin (中文)', text: '我58岁,住在波士顿,患有乳腺癌' },
+ { id: 'ar', label: 'Arabic (العربية)', text: 'أنا امرأة عمري 58 عامًا أعيش في بوسطن ومصابة بسرطان الثدي' },
+ { id: 'pt', label: 'Portuguese (Português)', text: 'Tenho 58 anos, moro em Boston e tenho câncer de mama' },
+ { id: 'fr', label: 'French (Français)', text: "J'ai 58 ans, je vis à Boston et j'ai un cancer du sein" },
+ { id: 'terse', label: 'Terse / fragments', text: '58F, BC, Boston' },
+]
+
diff --git a/src/components/ClassificationHarness.jsx b/src/components/ClassificationHarness.jsx
new file mode 100644
index 0000000..0c2cb67
--- /dev/null
+++ b/src/components/ClassificationHarness.jsx
@@ -0,0 +1,552 @@
+import { useState, useEffect } from 'react'
+import { useNLP } from '../hooks/useNLP'
+import { useClassifier } from '../hooks/useClassifier'
+import { NLP_MODELS, resolveModelKey } from '../utils/nlpModels'
+import { DEFAULT_CLASSIFY_PROMPT, parseVerdict } from '../utils/classifyTrial'
+import { SAMPLE_TRIALS, USER_PRESETS } from './ClassificationHarness.fixtures'
+
+// Normalize fixture-side expected values for binary agreement: POSSIBLE
+// counts as LIKELY (both = "show this trial"). Keeps the fixture data
+// informationally rich (3-class) while letting the binary model output
+// be evaluated correctly.
+function expectedBinary(expected) {
+ if (expected === 'POSSIBLE') return 'LIKELY'
+ return expected
+}
+
+const VERDICT_STYLES = {
+ LIKELY: 'bg-signal-good-bg text-signal-good',
+ POSSIBLE: 'bg-signal-warn-bg text-signal-warn',
+ UNLIKELY: 'bg-parchment-200 text-parchment-700',
+ PARSE_FAIL: 'bg-signal-bad-bg text-signal-bad',
+ PENDING: 'bg-parchment-100 text-parchment-700',
+}
+
+export default function ClassificationHarness() {
+ const [modelKey] = useState(() =>
+ resolveModelKey(typeof window !== 'undefined' ? window.location.search : '')
+ )
+ const model = NLP_MODELS[modelKey]
+ const { status, progress, error, load, webGPUSupported } = useNLP()
+ const { classifyOne, translateOne } = useClassifier()
+
+ const [userDesc, setUserDesc] = useState(USER_PRESETS[0].text)
+ const [promptTemplate, setPromptTemplate] = useState(DEFAULT_CLASSIFY_PROMPT)
+ const [trialsJson, setTrialsJson] = useState(JSON.stringify(SAMPLE_TRIALS, null, 2))
+ // Concurrency was a UI dropdown until we serialized at the hook level
+ // (WebLLM engine is single-threaded). Kept as a constant so the worker
+ // loop still controls fan-out at the harness level — the real
+ // serialization happens in useClassifier's promise chain.
+ const concurrency = 3
+ const [eligMax, setEligMax] = useState(1500)
+ const [translateFirst, setTranslateFirst] = useState(false)
+ const [translatedDesc, setTranslatedDesc] = useState(null)
+ const [productionMode, setProductionMode] = useState(true)
+ const [results, setResults] = useState([])
+ const [running, setRunning] = useState(false)
+ const [startT, setStartT] = useState(0)
+ const [, setTick] = useState(0)
+
+ // Lightweight ticker so elapsed time updates while a run is in flight.
+ useEffect(() => {
+ if (!running) return
+ const id = setInterval(() => setTick(t => t + 1), 250)
+ return () => clearInterval(id)
+ }, [running])
+
+ function getProgressLabel() {
+ if (!progress) return 'Loading model…'
+ return progress.text || `Loading model… ${Math.round((progress.progress ?? 0) * 100)}%`
+ }
+
+ async function run() {
+ let trials
+ try {
+ trials = JSON.parse(trialsJson)
+ if (!Array.isArray(trials)) throw new Error('Not an array')
+ } catch (e) {
+ alert('Trials JSON is invalid: ' + e.message)
+ return
+ }
+
+ setRunning(true)
+ setStartT(performance.now())
+ const initial = trials.map(trial => ({ trial, status: 'PENDING' }))
+ setResults(initial)
+ setTranslatedDesc(null)
+
+ // Translate user description to English once before classification, so the
+ // model anchors on a single language at inference time. Runs only once per
+ // batch — amortized cost across all N trials.
+ let effectiveUserDesc = userDesc
+ if (translateFirst) {
+ const translatePrompt = `Translate the following patient description into clear, clinical English. Preserve all medical and demographic facts (age, sex, condition, treatments, location). Do not add or remove information. Output ONLY the English translation, nothing else.
+
+Patient description: ${userDesc}
+
+English translation:`
+ try {
+ const { raw } = await translateOne(translatePrompt)
+ effectiveUserDesc = (raw || '').trim().replace(/^["']|["']$/g, '')
+ setTranslatedDesc(effectiveUserDesc)
+ } catch (e) {
+ alert('Translation failed: ' + (e?.message ?? 'unknown error'))
+ setRunning(false)
+ return
+ }
+ }
+
+ const queue = trials.map((trial, idx) => ({ idx, trial }))
+ const workersN = Math.min(concurrency, trials.length)
+
+ async function worker() {
+ while (queue.length) {
+ const { idx, trial } = queue.shift()
+ const elig = (trial.eligibility || '').slice(0, eligMax)
+ const prompt = promptTemplate
+ .replace('{{user}}', effectiveUserDesc)
+ .replace('{{title}}', trial.title || trial.briefTitle || '')
+ .replace('{{eligibility}}', elig)
+ try {
+ const { raw, latencyMs } = await classifyOne(prompt)
+ const parsed = parseVerdict(raw)
+ setResults(prev => {
+ const next = [...prev]
+ next[idx] = { trial, status: 'DONE', raw, latencyMs, ...parsed }
+ return next
+ })
+ } catch (err) {
+ setResults(prev => {
+ const next = [...prev]
+ next[idx] = {
+ trial,
+ status: 'DONE',
+ raw: '',
+ latencyMs: 0,
+ verdict: 'PARSE_FAIL',
+ reason: err?.message ?? 'classify error',
+ }
+ return next
+ })
+ }
+ }
+ }
+
+ await Promise.all(Array.from({ length: workersN }, worker))
+ setRunning(false)
+ }
+
+ function reset() {
+ setTrialsJson(JSON.stringify(SAMPLE_TRIALS, null, 2))
+ setResults([])
+ }
+
+ const [copyState, setCopyState] = useState('idle') // idle | copied | error
+ async function copyMarkdown() {
+ const md = buildMarkdownReport({
+ userDesc,
+ translatedDesc,
+ translateFirst,
+ productionMode,
+ hiddenCount,
+ promptTemplate,
+ eligMax,
+ modelLabel: model.label,
+ results,
+ stats: { done: done.length, total: results.length, elapsed, avgLat, maxLat, parseRate, parseFails, agreementPct, matches, withExpected: withExpected.length },
+ })
+ try {
+ await navigator.clipboard.writeText(md)
+ setCopyState('copied')
+ setTimeout(() => setCopyState('idle'), 1800)
+ } catch {
+ setCopyState('error')
+ setTimeout(() => setCopyState('idle'), 2400)
+ }
+ }
+
+ // ───────── stats ─────────
+ const done = results.filter(r => r.status === 'DONE')
+ const lats = done.map(r => r.latencyMs).filter(n => n != null)
+ const avgLat = lats.length ? Math.round(lats.reduce((a, b) => a + b, 0) / lats.length) : 0
+ const maxLat = lats.length ? Math.round(Math.max(...lats)) : 0
+ const parseFails = done.filter(r => r.verdict === 'PARSE_FAIL').length
+ const parseRate = done.length ? Math.round(((done.length - parseFails) / done.length) * 100) : 0
+ const elapsed = startT ? ((performance.now() - startT) / 1000).toFixed(1) : '0.0'
+ // Production mode hides trials the CT.gov API would never return for the
+ // user's stated condition (e.g., melanoma trials in a breast-cancer search).
+ // The headline agreement % then reflects what users would actually see,
+ // not the model's behavior on stress-test inputs.
+ const inScope = (r) => !productionMode || !r.trial.outOfScope
+ const withExpected = done.filter(r => r.trial.expected && inScope(r))
+ const matches = withExpected.filter(r => r.verdict === expectedBinary(r.trial.expected)).length
+ const agreementPct = withExpected.length ? Math.round((matches / withExpected.length) * 100) : null
+ const hiddenCount = done.filter(r => r.trial.outOfScope).length
+
+ const canRun = status === 'ready' && !running
+
+ return (
+
+
+ Classification harness
+
+
+ Validate the proposed Stage-1 classifier (LIKELY / POSSIBLE / UNLIKELY) against real
+ ClinicalTrials.gov payloads using the on-device {model.label}. Pass criteria from the
+ Handoff: parse rate ≥ 90%, avg latency < 1.5s, agreement ≥ 80%.
+
+ )
+}
diff --git a/src/components/NaturalLanguageInput.test.jsx b/src/components/NaturalLanguageInput.test.jsx
index ca59891..a9fd691 100644
--- a/src/components/NaturalLanguageInput.test.jsx
+++ b/src/components/NaturalLanguageInput.test.jsx
@@ -158,3 +158,41 @@ describe('NaturalLanguageInput — error state', () => {
expect(screen.getByText(/try again/i)).toBeInTheDocument()
})
})
+
+describe('NaturalLanguageInput — queued submit during download', () => {
+ // Locks in the typing-while-loading flow: a user can hit Find trials while
+ // the model is still downloading; the intent is held until status flips to
+ // 'ready' and then auto-fires. Indirect smoke test for the StrictMode
+ // listener fix in useNLP — if the listener didn't re-attach after the dev
+ // double-invoke, the real-world status would never reach 'ready' and the
+ // drain effect (deps [status, pendingSubmit]) would never fire.
+ it('queues submit while downloading, fires extract once status flips to ready', async () => {
+ const extract = vi.fn().mockResolvedValue({
+ condition: 'breast cancer', location: null, age: 58, sex: 'FEMALE',
+ status: 'RECRUITING', phases: [],
+ })
+ useNLP.mockReturnValue({ ...baseHook, status: 'downloading', extract })
+ localStorage.setItem('iris_nlp_enabled', 'true')
+
+ const onExtract = vi.fn()
+ const { rerender } = render()
+ fireEvent.click(screen.getByRole('button', { name: /describe in your own words/i }))
+
+ fireEvent.change(screen.getByRole('textbox', { name: /natural language search/i }), {
+ target: { value: '58 with breast cancer' },
+ })
+
+ // Submit while downloading — should queue, NOT fire extract yet.
+ fireEvent.click(screen.getByRole('button', { name: /Run when ready/i }))
+ expect(extract).not.toHaveBeenCalled()
+ expect(screen.getByRole('button', { name: /Queued/i })).toBeInTheDocument()
+
+ // Worker reports ready. In production this comes via the listener that
+ // the StrictMode fix ensures stays attached after the cleanup-remount.
+ useNLP.mockReturnValue({ ...baseHook, status: 'ready', extract })
+ rerender()
+
+ await waitFor(() => expect(extract).toHaveBeenCalledWith('58 with breast cancer'))
+ await waitFor(() => expect(onExtract).toHaveBeenCalled())
+ })
+})
diff --git a/src/components/ResultCard.jsx b/src/components/ResultCard.jsx
index a520860..e9cc060 100644
--- a/src/components/ResultCard.jsx
+++ b/src/components/ResultCard.jsx
@@ -36,6 +36,36 @@ function SectionLabel({ children, pane }) {
)
}
+// Two-stage on-device pipeline status. Renders only in pane (detail) view
+// because the row already has a fit dot indicator. Tells the user
+// explicitly which stage is in flight so the empty content area below
+// doesn't read as "broken".
+function PipelineCaption({ stage, progress }) {
+ if (stage === 'classifying') {
+ return (
+
+
+
+ evaluating fit
+ {progress && progress.total > 0 && ` · ${progress.done} of ${progress.total}`}
+ · plain-language summary will follow
+
+
+ )
+ }
+ return null
+}
+
function MetaLine({ trial, nearest, pane }) {
const sep = (
@@ -80,6 +110,8 @@ export default function ResultCard({
inputLanguage = 'en',
simplificationSupported = true,
pane = false,
+ pipelineStage = null, // 'classifying' | 'awaiting-summary' | null
+ classifyProgress = null, // { done, total }
}) {
const nearest = nearestLocation(trial.locations, coords)
const wrapperClass = pane
@@ -87,11 +119,12 @@ export default function ResultCard({
: 'bg-white border border-parchment-400 rounded-lg p-5 mb-3 max-w-3xl'
const sumState = simplification?.summarize
- const fitState = simplification?.fit
-
+ // fitState/showFit removed when the "Why this might or might not fit you"
+ // section was dropped — Gemma 2B's accuracy on the fit narrative wasn't
+ // reliable enough to ship. Re-introduce both if the fit section comes
+ // back behind a fine-tuned model.
const showPlainLanguage = sumState && sumState.status !== 'error'
const showFallbackHint = sumState?.status === 'error'
- const showFit = fitState && fitState.status !== 'error' && fitState.text
return (
@@ -113,6 +146,10 @@ export default function ResultCard({
+ {pane && pipelineStage && (
+
+ )}
+
{showPlainLanguage && (
@@ -135,16 +172,11 @@ export default function ResultCard({
)}
- {showFit && (
-
- Why this might or might not fit you
-
- {fitState.text}
-
-
- )}
+ {/* "Why this might or might not fit you" intentionally omitted —
+ Gemma 2B's accuracy on the fit narrative isn't reliable
+ enough to ship. The TriageRow fit dot (driven by the
+ classifier) is the safer signal. The DoctorDisclaimer
+ below renders unconditionally to set expectations. */}
{(sumState.status === 'queued' || sumState.status === 'streaming') && (
@@ -224,8 +256,26 @@ export default function ResultCard({
)}
+ {pane && (
+
+
+ Check with your doctor when exploring treatment options
+ {' '}— this AI summary uses plain language to explain the treatment but can miss
+ eligibility details.
+
+ why?
+
+
+
+ The plain-language summary above was generated on your device by a small AI model. It
+ can miss or misstate who qualifies for a trial. Your care team has your full medical
+ picture and can confirm whether this one actually fits.
+
+
+ )}
+
{pane ? (
-
+
contact
{trial.contact.phone && (
{trial.contact.phone}
diff --git a/src/components/ResultCard.test.jsx b/src/components/ResultCard.test.jsx
index fdc91e3..99326c5 100644
--- a/src/components/ResultCard.test.jsx
+++ b/src/components/ResultCard.test.jsx
@@ -140,22 +140,19 @@ describe('ResultCard — Phase 3 simplification', () => {
expect(screen.getByText(/Plain-language version unavailable/i)).toBeInTheDocument()
})
- it('renders the fit paragraph when fit state is complete', () => {
+ // The "Why this might or might not fit you" section was removed because
+ // Gemma 2B's accuracy on the fit narrative wasn't reliable enough to
+ // ship — it occasionally flipped disease stage or treatment history.
+ // The TriageRow fit dot (driven by the binary classifier in
+ // useClassifier) is the safer signal. The simplifier still computes
+ // assess_fit when called; ResultCard just no longer renders it.
+ it('does not render the fit paragraph even when fit state is complete', () => {
const simplification = {
summarize: { status: 'complete', summary: 'Sum.', eligibility: 'Elig.', error: null },
fit: { status: 'complete', text: 'This may fit you because…', error: null },
}
render()
- expect(screen.getByText(/Why this might or might not fit you/i)).toBeInTheDocument()
- expect(screen.getByText('This may fit you because…')).toBeInTheDocument()
- })
-
- it('does not render fit section when fit is in error', () => {
- const simplification = {
- summarize: { status: 'complete', summary: 'Sum.', eligibility: 'Elig.', error: null },
- fit: { status: 'error', text: '', error: 'failed' },
- }
- render()
expect(screen.queryByText(/Why this might or might not fit you/i)).not.toBeInTheDocument()
+ expect(screen.queryByText('This may fit you because…')).not.toBeInTheDocument()
})
})
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index e0baa05..12813dd 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -1,7 +1,12 @@
-import { useEffect, useMemo, useState } from 'react'
+import { useEffect, useMemo, useRef, useState } from 'react'
import { useGeocode } from '../hooks/useGeocode'
import { useClinicalTrials } from '../hooks/useClinicalTrials'
import { useSimplifier } from '../hooks/useSimplifier'
+import { useNLP } from '../hooks/useNLP'
+import { useClassifier } from '../hooks/useClassifier'
+import { useIsMobile } from '../hooks/useIsMobile'
+import { NLP_MODELS } from '../utils/nlpModels'
+import { buildClassifyPrompt, parseVerdict } from '../utils/classifyTrial'
import ResultCard from './ResultCard'
import TriageRow from './TriageRow'
import MobileSheet from './MobileSheet'
@@ -11,22 +16,31 @@ import {
SUPPORTED_SIMPLIFICATION_LANGUAGES,
} from '../utils/detectInputLanguage'
+const NLP_CONSENT_KEY = 'iris_nlp_enabled'
+
+// Stage-1 classification is wired end-to-end (worker, hook, harness) but
+// not yet surfaced in the in-app results UI. Reason: without sort wiring
+// the fit dots don't drive any user-visible behavior — they're just
+// decoration. The harness at ?test=classify still uses the full pipeline
+// for prompt iteration and validation. Flip this to true once "Best fit"
+// sort is wired so the dots become actionable.
+const ENABLE_CLASSIFY_IN_RESULTS = false
+
+// Build a synthetic patient description from extracted fields when the user
+// came in via structured form but had previously used NL (so consent exists).
+function patientDescFromFields(fields) {
+ if (!fields) return null
+ const parts = []
+ if (fields.age != null) parts.push(`${fields.age}-year-old`)
+ if (fields.sex && fields.sex !== 'ALL') parts.push(fields.sex.toLowerCase())
+ if (fields.condition) parts.push(`with ${fields.condition}`)
+ if (fields.location) parts.push(`in ${fields.location}`)
+ return parts.length > 0 ? parts.join(' ') : null
+}
+
const EAGER_BATCH_SIZE = 5
-const MOBILE_BREAKPOINT_PX = 820
const LIST_WIDTH_PX = 400
-function useIsMobile() {
- const [isMobile, setIsMobile] = useState(() =>
- typeof window !== 'undefined' && window.innerWidth <= MOBILE_BREAKPOINT_PX
- )
- useEffect(() => {
- const onResize = () => setIsMobile(window.innerWidth <= MOBILE_BREAKPOINT_PX)
- window.addEventListener('resize', onResize)
- return () => window.removeEventListener('resize', onResize)
- }, [])
- return isMobile
-}
-
export default function ResultsList({ searchParams, modelKey, userDescription, extractedFields }) {
// Phase 3 simplification only ships for English and Spanish — those are
// the languages we've verified the local model produces accurately.
@@ -58,13 +72,73 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
extractedFields,
})
- const allTrials = data?.pages.flatMap(p => p.trials) ?? []
+ // Memoized so effect dep arrays comparing against allTrials don't churn
+ // every render — react-query returns the same `data` ref while data is
+ // unchanged, so memo identity is stable across non-data renders.
+ const allTrials = useMemo(
+ () => data?.pages.flatMap(p => p.trials) ?? [],
+ [data]
+ )
const isMobile = useIsMobile()
const [selectedNctId, setSelectedNctId] = useState(null)
const [sheetOpen, setSheetOpen] = useState(false)
const [compareSet, setCompareSet] = useState(() => new Set())
+ // ─── Stage-1 classification ───────────────────────────────────────
+ // Only fires when the user previously consented to the on-device model
+ // (iris_nlp_enabled localStorage key, set during NL flow). Structured-
+ // form-only sessions skip classification entirely — no auto-load,
+ // no covert worker initialization. Verdicts surface as fit dots in
+ // TriageRow + a "evaluating fit · X of N" caption in the toolbar.
+ const nlp = useNLP()
+ const { classifyOne } = useClassifier()
+ const [classifications, setClassifications] = useState(new Map())
+ const [classifyProgress, setClassifyProgress] = useState({ done: 0, total: 0 })
+ const classifiedRef = useRef(new Set())
+ const cancelClassifyRef = useRef(null)
+
+ const consented = useMemo(() => {
+ try { return localStorage.getItem(NLP_CONSENT_KEY) === 'true' } catch { return false }
+ }, [])
+ const patientDesc = useMemo(
+ () => userDescription || patientDescFromFields(extractedFields),
+ [userDescription, extractedFields]
+ )
+ const canClassify = ENABLE_CLASSIFY_IN_RESULTS && consented && nlp.webGPUSupported && Boolean(patientDesc)
+
+ // Idempotent: worker fast-returns 'ready' if engine already loaded
+ // (e.g. NL extraction loaded it earlier this session). Destructure
+ // load() out of nlp so we can list it in deps directly — `nlp` itself
+ // is a fresh object on every render (useNLP doesn't memoize its
+ // return), and listing the whole hook would re-fire the effect on
+ // every render even when nothing relevant changed.
+ const nlpLoad = nlp.load
+ useEffect(() => {
+ if (!canClassify) return
+ if (nlp.status !== 'idle') return
+ const model = NLP_MODELS[modelKey] ?? NLP_MODELS.gemma
+ nlpLoad(model.id, { isThinking: model.isThinking, chatOpts: model.chatOpts })
+ }, [canClassify, nlp.status, modelKey, nlpLoad])
+
+ // Reset classification state when EITHER the search params OR the patient
+ // description changes. Including patientDesc handles the case where a user
+ // hits "Find trials" again with a refined prompt that happens to extract
+ // to the same condition: the API result set may be cached (same trials)
+ // but the verdicts are now stale w.r.t. the new patient description, so
+ // classifications + classifiedRef must be wiped and the in-flight batch
+ // cancelled so the next pass re-classifies against the new patient.
+ // Also resets the simplifier so any in-flight summary stops competing
+ // with the re-classification pass.
+ useEffect(() => {
+ classifiedRef.current = new Set()
+ setClassifications(new Map())
+ setClassifyProgress({ done: 0, total: 0 })
+ if (cancelClassifyRef.current) cancelClassifyRef.current()
+ simplifier.cancelPending()
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [searchParams, patientDesc])
+
function toggleCompare(nctId) {
setCompareSet(prev => {
const next = new Set(prev)
@@ -93,22 +167,82 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
if (isMobile) setSheetOpen(true)
}
- // Fire when the result set changes — keyed on the first 5 NCT IDs.
- // Using searchParams as the key would fire too early (before data arrives);
- // using allTrials would re-fire on every pagination append.
+ // Classify newly-arrived trials. Pagination appends → classify only new
+ // NCTs. Engine-not-loaded check is via nlp.status !== 'ready'.
+ const trialKeyAll = allTrials.map(t => t.nctId).join(',')
+ useEffect(() => {
+ if (!canClassify || nlp.status !== 'ready' || !patientDesc) return
+ const newTrials = allTrials.filter(t => !classifiedRef.current.has(t.nctId))
+ if (newTrials.length === 0) return
+ for (const t of newTrials) classifiedRef.current.add(t.nctId)
+
+ setClassifyProgress(prev => ({ done: prev.done, total: prev.total + newTrials.length }))
+
+ let cancelled = false
+ cancelClassifyRef.current = () => { cancelled = true }
+ ;(async () => {
+ for (const trial of newTrials) {
+ if (cancelled) return
+ try {
+ const prompt = buildClassifyPrompt(patientDesc, trial)
+ const { raw } = await classifyOne(prompt)
+ const parsed = parseVerdict(raw)
+ if (cancelled) return
+ setClassifications(prev => {
+ const next = new Map(prev)
+ next.set(trial.nctId, { status: 'done', ...parsed, raw })
+ return next
+ })
+ } catch (err) {
+ if (cancelled) return
+ setClassifications(prev => {
+ const next = new Map(prev)
+ next.set(trial.nctId, { status: 'done', verdict: 'PARSE_FAIL', reason: err?.message ?? 'classify error' })
+ return next
+ })
+ } finally {
+ if (!cancelled) {
+ setClassifyProgress(prev => ({ ...prev, done: prev.done + 1 }))
+ }
+ }
+ }
+ })()
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [canClassify, nlp.status, patientDesc, trialKeyAll])
+
+ // Reset the simplifier when the result set changes (new search). The
+ // per-trial enqueue happens below in the selected-trial effect.
const eagerKey = allTrials.slice(0, EAGER_BATCH_SIZE).map(t => t.nctId).join(',')
useEffect(() => {
simplifier.cancelPending()
simplifier.resetCache()
- if (allTrials.length === 0) return
+ // eslint-disable-next-line react-hooks/exhaustive-deps
+ }, [eagerKey])
+
+ // Per Handoff Phase 3 step 6: stage-2 simplification only fires for the
+ // currently-selected trial. Critically, it WAITS for stage-1
+ // classification to finish first — otherwise both compete for the
+ // single-threaded worker, the classifier appears to stall, and the
+ // simplifier (running first) produces noisier output under contention.
+ // For structured-form-only sessions canClassify is false and
+ // classifyProgress.total stays 0, so the gate falls through to "true"
+ // and simplification runs immediately on selection.
+ const classifyDone = !canClassify || (
+ classifyProgress.total > 0 && classifyProgress.done >= classifyProgress.total
+ )
+ useEffect(() => {
if (!simplificationSupported) return
- const eager = allTrials.slice(0, EAGER_BATCH_SIZE)
- for (const t of eager) simplifier.enqueueSummarize(t, { outputLanguage })
- if (extractedFields) {
- for (const t of eager) simplifier.enqueueAssessFit(t, { outputLanguage })
- }
+ if (!selected) return
+ if (!classifyDone) return
+ simplifier.enqueueSummarize(selected, { outputLanguage })
+ // assess_fit ("Why this might or might not fit you") intentionally not
+ // enqueued — Gemma 2B's accuracy on the fit narrative isn't reliable
+ // enough to ship (it occasionally flips disease stage / treatment
+ // history). The classifier's binary verdict + dot is the safer signal.
+ // The assess_fit pipeline itself stays in useSimplifier in case we
+ // re-enable it on a fine-tuned model later.
// eslint-disable-next-line react-hooks/exhaustive-deps
- }, [eagerKey, simplificationSupported, outputLanguage])
+ }, [selected?.nctId, simplificationSupported, outputLanguage, classifyDone])
if (isLoading) {
return (
@@ -156,19 +290,37 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
function handleRequestSimplify(trial) {
if (!simplificationSupported) return
simplifier.enqueueSummarize(trial, { outputLanguage })
- if (extractedFields) simplifier.enqueueAssessFit(trial, { outputLanguage })
+ // assess_fit deliberately omitted — see selected-trial effect above.
}
function renderDetail(trial) {
+ // Tell ResultCard which pipeline stage is in flight so it can render
+ // an explicit progress caption above the empty content area instead of
+ // showing the trial's raw summary (which can look like the model
+ // already replied with the wrong text).
+ let pipelineStage = null
+ const sim = simplifier.states.get(trial.nctId)
+ const simStatus = sim?.summarize?.status
+ if (canClassify && !classifyDone) {
+ pipelineStage = 'classifying'
+ } else if (
+ simplificationSupported &&
+ (!simStatus || simStatus === 'queued') &&
+ classifyDone
+ ) {
+ pipelineStage = 'awaiting-summary'
+ }
return (
)
}
@@ -184,6 +336,7 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
= 3}
+ classification={canClassify ? classifications.get(trial.nctId) : null}
+ classifyPending={canClassify && !classifications.has(trial.nctId)}
/>
))}
@@ -262,16 +417,14 @@ const PHASE_LABELS = {
PHASE4: 'Phase 4',
}
-const SORT_OPTIONS = [
- { id: 'fit', label: 'Best fit', disabled: true, title: 'Available once on-device classification runs' },
- { id: 'distance', label: 'Distance', disabled: true, title: 'Sort wiring coming in a follow-up' },
- { id: 'phase', label: 'Phase', disabled: true, title: 'Sort wiring coming in a follow-up' },
- { id: 'recent', label: 'Most recent', disabled: true, title: 'Sort wiring coming in a follow-up' },
-]
-
-function ResultsToolbar({ totalCount, searchParams }) {
- const [sort, setSort] = useState('recent')
+// Sort UI removed — the chips were visible-but-disabled placeholders for
+// "Best fit" / "Distance" / "Phase" / "Most recent" which read as broken
+// to users. When sort wiring lands (CT.gov API supports `sort=` for
+// distance and last-update; "Best fit" needs the classifier verdicts
+// per-trial), restore from git history at 67d5fc8 and wire onClick →
+// re-fetch through useClinicalTrials with the new sort token.
+function ResultsToolbar({ totalCount, searchParams, classifyProgress }) {
const summaryParts = [`${totalCount.toLocaleString()} trial${totalCount !== 1 ? 's' : ''}`]
if (searchParams.location) summaryParts.push(`near ${searchParams.location}`)
if (searchParams.location && searchParams.radius) summaryParts.push(`within ${searchParams.radius} mi`)
@@ -282,7 +435,7 @@ function ResultsToolbar({ totalCount, searchParams }) {
}
return (
-