diff --git a/docs/design-references-shared/README.md b/docs/design-references-shared/README.md
new file mode 100644
index 0000000..489ae34
--- /dev/null
+++ b/docs/design-references-shared/README.md
@@ -0,0 +1,17 @@
+# shared/iris-shared.jsx — design reference, not source
+
+Reference implementations from the original Claude.ai design exploration.
+Components in this file (`IrisHeader`, `IrisSearchBar`, `LocalAIBadge`,
+`FitMeter`, `StatusPill`, `ActionRow`, `StreamingText`, …) were ported into
+the live React app under `src/components/` and `src/utils/` — the versions
+here are kept verbatim so a future reader can compare implementations
+against the original prototype.
+
+**Do not import from this file in `src/`.** It runs against a Babel-standalone
+environment in `IRIS Triage.html` and uses inline-style patterns the live
+app intentionally moved away from (the live app uses Tailwind utility
+classes on top of CSS custom properties from `styles/tokens.css`).
+
+If you're trying to "fix" or "consolidate" this file: stop. Edit the live
+component under `src/components/` instead. The existence of this file is
+documentation, not duplication.
diff --git a/shared/iris-shared.jsx b/docs/design-references-shared/iris-shared.jsx
similarity index 100%
rename from shared/iris-shared.jsx
rename to docs/design-references-shared/iris-shared.jsx
diff --git a/src/App.jsx b/src/App.jsx
index 83666b7..b4ef669 100644
--- a/src/App.jsx
+++ b/src/App.jsx
@@ -18,12 +18,15 @@ const NLPTestPanel = import.meta.env.DEV
 const ProdScenarioTestPanel = import.meta.env.DEV
   ? lazy(() => import('./components/ProdScenarioTestPanel'))
   : null
+const ClassificationHarness = import.meta.env.DEV
+  ? lazy(() => import('./components/ClassificationHarness'))
+  : null
 
 function getTestRoute() {
   if (typeof window === 'undefined') return null
   if (!import.meta.env.DEV) return null
   const t = new URLSearchParams(window.location.search).get('test')
-  return t === 'nlp' || t === 'scenarios' ? t : null
+  return t === 'nlp' || t === 'scenarios' || t === 'classify' ? t : null
 }
 
 function IrisApp() {
@@ -45,6 +48,15 @@ function IrisApp() {
       </div>
     )
   }
+  if (testRoute === 'classify' && ClassificationHarness) {
+    return (
+      <div className="min-h-screen bg-parchment-50">
+        <Suspense fallback={<div className="p-6 text-sm">Loading classification harness…</div>}>
+          <ClassificationHarness />
+        </Suspense>
+      </div>
+    )
+  }
   if (testRoute === 'scenarios' && ProdScenarioTestPanel) {
     // ProdScenarioTestPanel calls fetch directly, so it doesn't need a query
     // client. IrisApp is already inside the App() QueryClientProvider, so no
diff --git a/src/components/ClassificationHarness.fixtures.js b/src/components/ClassificationHarness.fixtures.js
new file mode 100644
index 0000000..c33b3bc
--- /dev/null
+++ b/src/components/ClassificationHarness.fixtures.js
@@ -0,0 +1,332 @@
+// Fixture data for the dev-only Classification Harness (?test=classify).
+// Lives next to the component but split out because the trial array is
+// 300+ lines and made the harness file hard to navigate when iterating
+// on prompts vs data.
+//
+// `outOfScope: true` flags trials the CT.gov API would NOT return for
+// a breast-cancer search — kept in the fixture as wrong-condition
+// stress tests, but the harness's "production-realistic agreement"
+// toggle excludes them from the headline metric.
+
+export const SAMPLE_TRIALS = [
+  {
+    nctId: 'NCT05952557',
+    title: 'Phase IIIb Study of Ribociclib + Endocrine Therapy in Early Breast Cancer',
+    eligibility: 'Inclusion: Adult female, ≥18 years. HR-positive, HER2-negative early breast cancer. Completed definitive surgery. Postmenopausal status confirmed. ECOG 0-1. Adequate organ function. Exclusion: Prior CDK4/6 inhibitor. Pregnancy or breastfeeding. Active second malignancy.',
+    expected: 'LIKELY',
+  },
+  {
+    nctId: 'NCT06104020',
+    title: 'Sacituzumab Govitecan in Metastatic Triple-Negative Breast Cancer',
+    eligibility: 'Inclusion: Adult, any sex. Histologically confirmed metastatic triple-negative breast cancer (ER<1%, PR<1%, HER2-negative). At least one prior line of systemic therapy in metastatic setting. ECOG 0-2. Measurable disease per RECIST 1.1. Exclusion: Active CNS metastases. Prior topoisomerase I inhibitor.',
+    expected: 'POSSIBLE',
+  },
+  {
+    nctId: 'NCT05887492',
+    title: 'Adaptive Radiation Boost in Locally Advanced HER2+ Breast Cancer',
+    eligibility: 'Inclusion: Adult female. HER2-positive breast cancer confirmed by IHC 3+ or FISH-positive. Stage II-III disease. Completed neoadjuvant chemotherapy. ECOG 0-1. Exclusion: Prior radiation to chest. Pregnancy.',
+    expected: 'POSSIBLE',
+  },
+  {
+    nctId: 'NCT06221340',
+    title: 'Aerobic Exercise During Adjuvant Chemo for Breast Cancer Survivors',
+    eligibility: 'Inclusion: Adult, any sex. Breast cancer, any stage. Currently receiving or scheduled for adjuvant chemotherapy. Cleared by oncologist for moderate exercise. Exclusion: Cardiac contraindications.',
+    expected: 'LIKELY',
+  },
+  {
+    nctId: 'NCT04123456',
+    title: 'Pembrolizumab in Advanced Non-Small Cell Lung Cancer',
+    eligibility: 'Inclusion: Adult. Histologically confirmed advanced NSCLC. PD-L1 expression ≥50%. ECOG 0-1. Exclusion: Active autoimmune disease. Prior immunotherapy.',
+    expected: 'UNLIKELY',
+    outOfScope: true, // NSCLC — wouldn't appear in a breast-cancer API search
+  },
+  {
+    nctId: 'NCT05123987',
+    title: 'Targeted Therapy in Pediatric Acute Lymphoblastic Leukemia',
+    eligibility: 'Inclusion: Pediatric patients aged 2-17 years. Newly diagnosed ALL. Exclusion: Adults. Prior chemotherapy.',
+    expected: 'UNLIKELY',
+    outOfScope: true, // Pediatric ALL — wouldn't appear in a breast-cancer API search
+  },
+
+  // ─── Subtype-gated breast cancer trials — POSSIBLE without confirmed subtype ───
+  {
+    nctId: 'NCT05300100',
+    title: 'Tucatinib + Trastuzumab in HER2-Positive Metastatic Breast Cancer',
+    eligibility: 'Inclusion: Adult, any sex, ≥18 years. Histologically confirmed HER2-positive metastatic breast cancer (IHC 3+ or FISH-amplified). At least 2 prior HER2-directed therapies. ECOG 0-1. Exclusion: Untreated brain metastases. Prior tucatinib.',
+    expected: 'POSSIBLE',
+  },
+  {
+    nctId: 'NCT05400201',
+    title: 'Olaparib Maintenance in BRCA-Mutated HER2-Negative Breast Cancer',
+    eligibility: 'Inclusion: Adult female. HER2-negative breast cancer with germline BRCA1 or BRCA2 mutation (confirmed by central testing). High-risk early disease following adjuvant chemotherapy. Postmenopausal or premenopausal with ovarian suppression. Exclusion: Prior PARP inhibitor.',
+    expected: 'POSSIBLE',
+  },
+  {
+    nctId: 'NCT05511223',
+    title: 'CDK4/6 Inhibitor Switch in Hormone-Receptor-Positive Advanced Breast Cancer',
+    eligibility: 'Inclusion: Adult women, postmenopausal. HR-positive, HER2-negative advanced or metastatic breast cancer. Disease progression on a prior CDK4/6 inhibitor. ECOG 0-2.',
+    expected: 'POSSIBLE',
+  },
+
+  // ─── Strong matches for a 58yo with breast cancer ───
+  {
+    nctId: 'NCT05633445',
+    title: 'Cognitive Behavioral Therapy for Cancer-Related Fatigue',
+    eligibility: 'Inclusion: Adults ≥18 years with any solid tumor diagnosis (breast, colon, lung, prostate, etc.). Currently in active treatment or within 5 years of treatment completion. Self-reported fatigue ≥4 on a 0-10 scale. Exclusion: Severe untreated depression. Inability to attend weekly sessions.',
+    expected: 'LIKELY',
+  },
+  {
+    nctId: 'NCT05755677',
+    title: 'Lymphedema Surveillance Program After Breast Cancer Surgery',
+    eligibility: 'Inclusion: Adult female ≥18 years. History of breast cancer treated with axillary surgery (sentinel lymph node biopsy or axillary dissection). Within 3 years of surgery. Exclusion: Pre-existing lymphedema. Current breast cancer recurrence.',
+    expected: 'LIKELY',
+  },
+  {
+    nctId: 'NCT05822334',
+    title: 'Mindfulness-Based Stress Reduction for Breast Cancer Survivors',
+    eligibility: 'Inclusion: Adult women ≥21 years. Diagnosed with breast cancer (any stage). Completed primary treatment within the past 5 years OR currently on adjuvant endocrine therapy. Exclusion: Active psychosis. Prior MBSR participation.',
+    expected: 'LIKELY',
+  },
+  {
+    nctId: 'NCT05901128',
+    title: 'Vaginal Estrogen Safety Study in Postmenopausal Breast Cancer Survivors',
+    eligibility: 'Inclusion: Postmenopausal women ages 45-75 with a history of HR-positive or HR-negative breast cancer. Disease-free for ≥1 year. Genitourinary symptoms of menopause. Stable on aromatase inhibitor or tamoxifen, or treatment-free. Exclusion: Current metastatic disease.',
+    expected: 'LIKELY',
+  },
+
+  // ─── Wrong condition / wrong demographic — clear UNLIKELY ───
+  {
+    nctId: 'NCT04567890',
+    title: 'Pembrolizumab in Advanced Melanoma',
+    eligibility: 'Inclusion: Adults with histologically confirmed unresectable Stage III or Stage IV melanoma. ECOG 0-1. No prior systemic therapy for advanced disease. Exclusion: Active autoimmune disease.',
+    expected: 'UNLIKELY',
+    outOfScope: true,
+  },
+  {
+    nctId: 'NCT04678901',
+    title: 'Apixaban vs. Warfarin in Atrial Fibrillation',
+    eligibility: 'Inclusion: Adults ≥18 years with non-valvular atrial fibrillation. CHA2DS2-VASc score ≥2. Exclusion: Mechanical heart valve. Active bleeding.',
+    expected: 'UNLIKELY',
+    outOfScope: true,
+  },
+  {
+    nctId: 'NCT04789012',
+    title: 'GLP-1 Agonist for Weight Management in Type 2 Diabetes',
+    eligibility: 'Inclusion: Adults 18-75 with Type 2 diabetes mellitus. BMI ≥30. HbA1c 7.0-10.0%. Exclusion: Type 1 diabetes. Active malignancy within 5 years. History of pancreatitis.',
+    expected: 'UNLIKELY',
+    outOfScope: true,
+  },
+  {
+    nctId: 'NCT04890123',
+    title: 'Robotic Prostatectomy Outcomes in Localized Prostate Cancer',
+    eligibility: 'Inclusion: Men ≥40 years with biopsy-confirmed clinically localized prostate cancer (T1-T2). Candidate for radical prostatectomy. Exclusion: Prior pelvic surgery or radiation.',
+    expected: 'UNLIKELY',
+    outOfScope: true,
+  },
+  {
+    nctId: 'NCT04901234',
+    title: 'Pediatric Vaccine Immunogenicity Study',
+    eligibility: 'Inclusion: Healthy children aged 6 months to 5 years. Up to date on routine immunizations. Exclusion: Immunocompromised. Recent illness within 14 days.',
+    expected: 'UNLIKELY',
+    outOfScope: true,
+  },
+
+  // ─── Edge cases — should challenge the model ───
+  {
+    nctId: 'NCT05012345',
+    title: 'Palliative Care Integration in Patients with Advanced Solid Tumors',
+    eligibility: 'Inclusion: Adults ≥18 years with advanced (Stage IV) solid tumor of any primary site (breast, lung, GI, GU, GYN). Estimated prognosis 6-24 months. ECOG 0-3. Exclusion: Currently enrolled in hospice.',
+    expected: 'POSSIBLE',
+  },
+  {
+    nctId: 'NCT05123450',
+    title: 'Premenopausal Breast Cancer: Ovarian Function Suppression Trial',
+    eligibility: 'Inclusion: Premenopausal women ages 18-45 with newly diagnosed HR-positive early breast cancer. Confirmed premenopausal by FSH and estradiol levels. Exclusion: Postmenopausal status. Prior ovarian suppression therapy.',
+    expected: 'UNLIKELY',
+  },
+
+  // ─── Realistic-length eligibility (~2-3.5kB each) — stress-tests how the
+  //     model handles formal CT.gov noise and how truncation affects accuracy.
+  //     Try these with eligMax = 800 vs 3000 vs 6000 to see the trade-off.
+  {
+    nctId: 'NCT-LONG-01',
+    title: 'Phase II Study of Sacituzumab Govitecan-hziy in Patients with HR-Positive, HER2-Negative Metastatic Breast Cancer After Endocrine Therapy and CDK4/6 Inhibitor',
+    eligibility: `Inclusion Criteria:
+
+1. Female participants ≥18 years of age at the time of signing informed consent.
+2. Histologically or cytologically confirmed adenocarcinoma of the breast that is metastatic or locally advanced and not amenable to curative resection or radiotherapy.
+3. Documentation of estrogen receptor (ER)-positive (≥1% staining by IHC) and/or progesterone receptor (PR)-positive (≥1% staining by IHC) tumor status, in accordance with ASCO/CAP guidelines.
+4. Documentation of HER2-negative status defined as IHC 0, IHC 1+, or IHC 2+ with negative in situ hybridization (ISH), per ASCO/CAP guidelines.
+5. Disease progression on or after at least one prior CDK4/6 inhibitor (palbociclib, ribociclib, or abemaciclib) administered for advanced or metastatic disease, in combination with an aromatase inhibitor or fulvestrant.
+6. Disease progression on or after at least one and no more than two prior endocrine therapies (e.g., aromatase inhibitor, fulvestrant, tamoxifen) for advanced or metastatic disease.
+7. No more than one prior chemotherapy regimen for metastatic disease.
+8. Postmenopausal status, OR premenopausal/perimenopausal women who agree to receive concurrent ovarian function suppression with a luteinizing hormone-releasing hormone (LHRH) agonist throughout study treatment.
+9. Measurable disease per RECIST v1.1, or non-measurable bone-only disease assessable per protocol-specified criteria.
+10. ECOG performance status 0 or 1.
+11. Adequate organ function:
+    - Absolute neutrophil count (ANC) ≥1.5 × 10^9/L
+    - Platelets ≥100 × 10^9/L
+    - Hemoglobin ≥9.0 g/dL (transfusion permitted)
+    - Total bilirubin ≤1.5 × ULN (≤3 × ULN for participants with documented Gilbert syndrome)
+    - AST and ALT ≤2.5 × ULN (≤5 × ULN if liver metastases present)
+    - Creatinine clearance ≥50 mL/min by Cockcroft-Gault equation
+    - INR and aPTT ≤1.5 × ULN unless on anticoagulants
+12. Resolution of all acute toxic effects of prior anti-cancer therapy or surgical procedures to NCI CTCAE v5.0 Grade ≤1 (except alopecia and Grade 2 neuropathy).
+13. Willingness to provide tumor tissue (archival or fresh biopsy) for biomarker analyses.
+
+Exclusion Criteria:
+
+1. Prior treatment with sacituzumab govitecan or any other Trop-2-directed therapy.
+2. Prior treatment with an antibody-drug conjugate containing a topoisomerase I inhibitor payload (e.g., trastuzumab deruxtecan).
+3. Active CNS metastases. Participants with previously treated, asymptomatic CNS metastases are eligible if clinically stable for ≥4 weeks off corticosteroids and anticonvulsants.
+4. Leptomeningeal disease.
+5. Known active infection requiring systemic therapy, including untreated HIV, active HBV (HBsAg positive or HBV DNA detectable), or active HCV (HCV RNA detectable).
+6. Significant cardiovascular disease, including: NYHA Class III or IV congestive heart failure, myocardial infarction or unstable angina within 6 months, uncontrolled arrhythmia, baseline QTcF >470 ms.
+7. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer, in situ cervical or breast cancer, or low-risk localized prostate cancer on active surveillance.
+8. Known hypersensitivity to irinotecan or any component of the study drug formulation.
+9. Pregnant or breastfeeding women. Women of childbearing potential must agree to use highly effective contraception during the study and for 6 months after the last dose.
+10. Concurrent participation in another therapeutic clinical trial.
+11. Major surgery within 4 weeks prior to first dose.
+12. Live vaccines within 30 days prior to first dose.`,
+    expected: 'POSSIBLE',
+  },
+  {
+    nctId: 'NCT-LONG-02',
+    title: 'Randomized Phase III Trial of Adjuvant Endocrine Therapy ± Abemaciclib in Postmenopausal Women with HR-Positive, HER2-Negative, Node-Positive Early Breast Cancer at High Risk of Recurrence',
+    eligibility: `Inclusion Criteria:
+
+1. Female, postmenopausal at the time of randomization. Postmenopausal status defined as: (a) prior bilateral oophorectomy, (b) age ≥60 years, OR (c) age <60 with amenorrhea ≥12 months in the absence of chemotherapy, tamoxifen, or ovarian suppression AND FSH and estradiol in the postmenopausal range.
+2. Age 18 to 75 years inclusive at the time of consent.
+3. ECOG performance status of 0, 1, or 2.
+4. Histologically confirmed invasive breast carcinoma. Multicentric or multifocal disease is allowed if all foci meet eligibility.
+5. Hormone receptor-positive disease, defined as ≥1% of tumor cells staining positive for estrogen receptor and/or progesterone receptor by IHC, per ASCO/CAP guidelines.
+6. HER2-negative disease, defined as IHC 0, 1+, or 2+ with negative reflex ISH testing per ASCO/CAP guidelines.
+7. Stage II or III disease with high-risk pathologic features, defined as ≥1 of the following:
+    - ≥4 positive axillary lymph nodes, OR
+    - 1-3 positive axillary lymph nodes AND tumor size ≥5 cm, OR
+    - 1-3 positive axillary lymph nodes AND histologic grade 3, OR
+    - 1-3 positive axillary lymph nodes AND Ki-67 ≥20%
+8. Definitive surgical treatment of primary tumor with negative margins (lumpectomy with whole-breast irradiation OR mastectomy with or without post-mastectomy radiation per institutional standard).
+9. Completion of any neoadjuvant or adjuvant chemotherapy at least 21 days but no more than 16 months prior to randomization.
+10. Initiation of adjuvant endocrine therapy (aromatase inhibitor, with or without LHRH agonist) is permitted, but participants must not have received endocrine therapy for more than 12 weeks prior to randomization.
+11. Adequate organ function within 14 days of randomization:
+    - ANC ≥1.5 × 10^9/L
+    - Platelets ≥100 × 10^9/L
+    - Hemoglobin ≥10.0 g/dL
+    - Total bilirubin ≤1.5 × ULN
+    - AST/ALT ≤2.5 × ULN
+    - Creatinine clearance ≥50 mL/min
+12. Negative serum or urine pregnancy test for participants of childbearing potential.
+
+Exclusion Criteria:
+
+1. Stage IV (metastatic) breast cancer or evidence of distant metastases on staging imaging.
+2. Inflammatory breast cancer.
+3. Bilateral invasive breast cancer.
+4. Prior treatment with any CDK4/6 inhibitor in any setting.
+5. Prior anti-cancer therapy other than chemotherapy and locoregional therapy for the current breast cancer diagnosis.
+6. History of another malignancy within 5 years prior to randomization, except adequately treated non-melanoma skin cancer, in situ cervical cancer, or contralateral DCIS.
+7. Active or chronic hepatitis B or C infection, or known HIV infection.
+8. Significant uncontrolled cardiovascular disease: NYHA Class III/IV heart failure, myocardial infarction within 6 months, ventricular arrhythmia requiring treatment.
+9. History of interstitial lung disease or pneumonitis requiring corticosteroids.
+10. Major surgery (other than breast cancer surgery) within 28 days of randomization.
+11. Receiving strong CYP3A inhibitors or inducers within 14 days that cannot be discontinued.
+12. Inability to swallow oral medications or significant malabsorption.
+13. Pregnant or breastfeeding (premenopausal participants only — see inclusion criterion 1).`,
+    expected: 'LIKELY',
+  },
+  {
+    nctId: 'NCT-LONG-03',
+    title: 'Phase III Study of Pembrolizumab Plus Chemotherapy versus Chemotherapy Alone for First-Line Treatment of Metastatic Squamous Non-Small Cell Lung Cancer',
+    outOfScope: true,
+    eligibility: `Inclusion Criteria:
+
+1. Histologically or cytologically confirmed Stage IV squamous non-small cell lung cancer (NSCLC) per AJCC 8th edition.
+2. Male or female ≥18 years of age.
+3. No prior systemic therapy for metastatic NSCLC. Prior adjuvant or neoadjuvant chemotherapy is allowed if completed ≥6 months prior to enrollment.
+4. Measurable disease per RECIST v1.1.
+5. Provision of a tumor tissue sample (archival or fresh biopsy) adequate for PD-L1 IHC testing using the 22C3 pharmDx assay.
+6. ECOG performance status 0 or 1.
+7. Life expectancy ≥3 months.
+8. Adequate organ function within 10 days of randomization:
+    - ANC ≥1.5 × 10^9/L without G-CSF support
+    - Platelets ≥100 × 10^9/L without transfusion
+    - Hemoglobin ≥9.0 g/dL
+    - Total bilirubin ≤1.5 × ULN
+    - AST/ALT ≤2.5 × ULN (≤5 × ULN if liver involvement)
+    - Creatinine clearance ≥45 mL/min
+    - INR/aPTT ≤1.5 × ULN
+9. Female participants of childbearing potential and male participants with partners of childbearing potential must agree to use effective contraception throughout treatment and for 120 days after last dose.
+
+Exclusion Criteria:
+
+1. Histology of mixed small cell and non-small cell lung cancer, or predominantly non-squamous histology.
+2. Known sensitizing EGFR mutation, ALK rearrangement, ROS1 rearrangement, BRAF V600E mutation, or other actionable alteration for which an approved targeted therapy is the standard of care.
+3. Prior treatment with any PD-1, PD-L1, PD-L2, or CTLA-4 inhibitor.
+4. Active autoimmune disease requiring systemic immunosuppression within 2 years. Replacement therapy (e.g., thyroxine, insulin, physiologic corticosteroids) is permitted.
+5. History of pneumonitis requiring corticosteroids, or active pneumonitis.
+6. Active CNS metastases or carcinomatous meningitis. Participants with previously treated, asymptomatic CNS metastases stable for ≥4 weeks may be eligible.
+7. Active infection requiring systemic therapy.
+8. Known active HIV, HBV, or HCV infection.
+9. Live vaccine within 30 days of first dose.
+10. History of solid organ or allogeneic stem cell transplant.
+11. Pregnant or breastfeeding women.
+12. History of another malignancy within 3 years, except for adequately treated non-melanoma skin cancer or in situ disease.`,
+    expected: 'UNLIKELY',
+  },
+  {
+    nctId: 'NCT-LONG-04',
+    title: 'Multicenter Randomized Trial of Empagliflozin in Patients with Heart Failure with Preserved Ejection Fraction and Type 2 Diabetes',
+    outOfScope: true,
+    eligibility: `Inclusion Criteria:
+
+1. Adults aged 40 to 85 years at consent.
+2. Documented diagnosis of heart failure with preserved ejection fraction (HFpEF):
+    - Left ventricular ejection fraction (LVEF) ≥50% on echocardiogram within the past 12 months
+    - NYHA functional class II, III, or IV
+    - Elevated NT-proBNP ≥300 pg/mL (or ≥600 pg/mL if atrial fibrillation present)
+    - Structural heart disease on echocardiography (LV hypertrophy or left atrial enlargement) OR documented prior HF hospitalization
+3. Documented Type 2 diabetes mellitus (T2DM) per ADA criteria, with HbA1c 6.5% to 10.0% at screening.
+4. Stable background heart failure therapy for ≥4 weeks (diuretic if indicated; ACEi/ARB/ARNI per guideline; beta-blocker per guideline).
+5. eGFR ≥25 mL/min/1.73m^2 by CKD-EPI equation.
+6. Body mass index 20 to 45 kg/m^2.
+7. Able and willing to provide written informed consent and adhere to study procedures.
+
+Exclusion Criteria:
+
+1. Type 1 diabetes mellitus.
+2. History of diabetic ketoacidosis within 12 months.
+3. LVEF <50% on most recent echocardiogram.
+4. Acute decompensated heart failure requiring IV diuretics within 4 weeks of screening.
+5. Acute coronary syndrome, stroke, or transient ischemic attack within 90 days.
+6. Planned cardiac surgery, percutaneous coronary intervention, or device implantation within 90 days.
+7. Symptomatic hypotension or systolic blood pressure <100 mmHg at screening.
+8. Significant valvular heart disease (severe aortic stenosis, severe mitral regurgitation requiring surgery).
+9. Hypertrophic cardiomyopathy, infiltrative cardiomyopathy, or constrictive pericarditis.
+10. eGFR <25 mL/min/1.73m^2 or end-stage renal disease requiring dialysis.
+11. Known active malignancy requiring treatment within the past 12 months. Participants with a history of cancer who are disease-free for >12 months are eligible.
+12. Severe hepatic impairment (Child-Pugh C).
+13. Pregnancy or breastfeeding.
+14. Known hypersensitivity to SGLT2 inhibitors.
+15. Participation in another interventional clinical trial within 30 days.
+16. Life expectancy <12 months due to non-cardiovascular cause.`,
+    expected: 'UNLIKELY',
+  },
+]
+
+// Patient description presets for multilingual + edge-case validation. Same
+// 58yo woman with breast cancer in Boston, expressed in different languages
+// and registers (formal, terse, etc.) so we can stress-test the model's
+// understanding without changing the underlying clinical signal.
+export const USER_PRESETS = [
+  { id: 'en',     label: 'English',                  text: "I'm 58 years old with breast cancer in Boston" },
+  { id: 'en-2',   label: 'English (more detail)',    text: "58-year-old woman in Boston, postmenopausal, recently diagnosed with breast cancer, looking for post-chemo treatment options" },
+  { id: 'es',     label: 'Spanish (Español)',        text: 'Tengo 58 años, vivo en Boston y tengo cáncer de mama' },
+  { id: 'es-2',   label: 'Spanish (more detail)',    text: 'Soy mujer de 58 años, posmenopáusica, vivo en Boston. Me diagnosticaron cáncer de mama y busco opciones de tratamiento después de quimioterapia.' },
+  { id: 'zh',     label: 'Mandarin (中文)',          text: '我58岁，住在波士顿，患有乳腺癌' },
+  { id: 'ar',     label: 'Arabic (العربية)',        text: 'أنا امرأة عمري 58 عامًا أعيش في بوسطن ومصابة بسرطان الثدي' },
+  { id: 'pt',     label: 'Portuguese (Português)',   text: 'Tenho 58 anos, moro em Boston e tenho câncer de mama' },
+  { id: 'fr',     label: 'French (Français)',        text: "J'ai 58 ans, je vis à Boston et j'ai un cancer du sein" },
+  { id: 'terse',  label: 'Terse / fragments',        text: '58F, BC, Boston' },
+]
+
diff --git a/src/components/ClassificationHarness.jsx b/src/components/ClassificationHarness.jsx
new file mode 100644
index 0000000..0c2cb67
--- /dev/null
+++ b/src/components/ClassificationHarness.jsx
@@ -0,0 +1,552 @@
+import { useState, useEffect } from 'react'
+import { useNLP } from '../hooks/useNLP'
+import { useClassifier } from '../hooks/useClassifier'
+import { NLP_MODELS, resolveModelKey } from '../utils/nlpModels'
+import { DEFAULT_CLASSIFY_PROMPT, parseVerdict } from '../utils/classifyTrial'
+import { SAMPLE_TRIALS, USER_PRESETS } from './ClassificationHarness.fixtures'
+
+// Normalize fixture-side expected values for binary agreement: POSSIBLE
+// counts as LIKELY (both = "show this trial"). Keeps the fixture data
+// informationally rich (3-class) while letting the binary model output
+// be evaluated correctly.
+function expectedBinary(expected) {
+  if (expected === 'POSSIBLE') return 'LIKELY'
+  return expected
+}
+
+const VERDICT_STYLES = {
+  LIKELY:     'bg-signal-good-bg text-signal-good',
+  POSSIBLE:   'bg-signal-warn-bg text-signal-warn',
+  UNLIKELY:   'bg-parchment-200 text-parchment-700',
+  PARSE_FAIL: 'bg-signal-bad-bg text-signal-bad',
+  PENDING:    'bg-parchment-100 text-parchment-700',
+}
+
+export default function ClassificationHarness() {
+  const [modelKey] = useState(() =>
+    resolveModelKey(typeof window !== 'undefined' ? window.location.search : '')
+  )
+  const model = NLP_MODELS[modelKey]
+  const { status, progress, error, load, webGPUSupported } = useNLP()
+  const { classifyOne, translateOne } = useClassifier()
+
+  const [userDesc, setUserDesc] = useState(USER_PRESETS[0].text)
+  const [promptTemplate, setPromptTemplate] = useState(DEFAULT_CLASSIFY_PROMPT)
+  const [trialsJson, setTrialsJson] = useState(JSON.stringify(SAMPLE_TRIALS, null, 2))
+  // Concurrency was a UI dropdown until we serialized at the hook level
+  // (WebLLM engine is single-threaded). Kept as a constant so the worker
+  // loop still controls fan-out at the harness level — the real
+  // serialization happens in useClassifier's promise chain.
+  const concurrency = 3
+  const [eligMax, setEligMax] = useState(1500)
+  const [translateFirst, setTranslateFirst] = useState(false)
+  const [translatedDesc, setTranslatedDesc] = useState(null)
+  const [productionMode, setProductionMode] = useState(true)
+  const [results, setResults] = useState([])
+  const [running, setRunning] = useState(false)
+  const [startT, setStartT] = useState(0)
+  const [, setTick] = useState(0)
+
+  // Lightweight ticker so elapsed time updates while a run is in flight.
+  useEffect(() => {
+    if (!running) return
+    const id = setInterval(() => setTick(t => t + 1), 250)
+    return () => clearInterval(id)
+  }, [running])
+
+  function getProgressLabel() {
+    if (!progress) return 'Loading model…'
+    return progress.text || `Loading model… ${Math.round((progress.progress ?? 0) * 100)}%`
+  }
+
+  async function run() {
+    let trials
+    try {
+      trials = JSON.parse(trialsJson)
+      if (!Array.isArray(trials)) throw new Error('Not an array')
+    } catch (e) {
+      alert('Trials JSON is invalid: ' + e.message)
+      return
+    }
+
+    setRunning(true)
+    setStartT(performance.now())
+    const initial = trials.map(trial => ({ trial, status: 'PENDING' }))
+    setResults(initial)
+    setTranslatedDesc(null)
+
+    // Translate user description to English once before classification, so the
+    // model anchors on a single language at inference time. Runs only once per
+    // batch — amortized cost across all N trials.
+    let effectiveUserDesc = userDesc
+    if (translateFirst) {
+      const translatePrompt = `Translate the following patient description into clear, clinical English. Preserve all medical and demographic facts (age, sex, condition, treatments, location). Do not add or remove information. Output ONLY the English translation, nothing else.
+
+Patient description: ${userDesc}
+
+English translation:`
+      try {
+        const { raw } = await translateOne(translatePrompt)
+        effectiveUserDesc = (raw || '').trim().replace(/^["']|["']$/g, '')
+        setTranslatedDesc(effectiveUserDesc)
+      } catch (e) {
+        alert('Translation failed: ' + (e?.message ?? 'unknown error'))
+        setRunning(false)
+        return
+      }
+    }
+
+    const queue = trials.map((trial, idx) => ({ idx, trial }))
+    const workersN = Math.min(concurrency, trials.length)
+
+    async function worker() {
+      while (queue.length) {
+        const { idx, trial } = queue.shift()
+        const elig = (trial.eligibility || '').slice(0, eligMax)
+        const prompt = promptTemplate
+          .replace('{{user}}', effectiveUserDesc)
+          .replace('{{title}}', trial.title || trial.briefTitle || '')
+          .replace('{{eligibility}}', elig)
+        try {
+          const { raw, latencyMs } = await classifyOne(prompt)
+          const parsed = parseVerdict(raw)
+          setResults(prev => {
+            const next = [...prev]
+            next[idx] = { trial, status: 'DONE', raw, latencyMs, ...parsed }
+            return next
+          })
+        } catch (err) {
+          setResults(prev => {
+            const next = [...prev]
+            next[idx] = {
+              trial,
+              status: 'DONE',
+              raw: '',
+              latencyMs: 0,
+              verdict: 'PARSE_FAIL',
+              reason: err?.message ?? 'classify error',
+            }
+            return next
+          })
+        }
+      }
+    }
+
+    await Promise.all(Array.from({ length: workersN }, worker))
+    setRunning(false)
+  }
+
+  function reset() {
+    setTrialsJson(JSON.stringify(SAMPLE_TRIALS, null, 2))
+    setResults([])
+  }
+
+  const [copyState, setCopyState] = useState('idle') // idle | copied | error
+  async function copyMarkdown() {
+    const md = buildMarkdownReport({
+      userDesc,
+      translatedDesc,
+      translateFirst,
+      productionMode,
+      hiddenCount,
+      promptTemplate,
+      eligMax,
+      modelLabel: model.label,
+      results,
+      stats: { done: done.length, total: results.length, elapsed, avgLat, maxLat, parseRate, parseFails, agreementPct, matches, withExpected: withExpected.length },
+    })
+    try {
+      await navigator.clipboard.writeText(md)
+      setCopyState('copied')
+      setTimeout(() => setCopyState('idle'), 1800)
+    } catch {
+      setCopyState('error')
+      setTimeout(() => setCopyState('idle'), 2400)
+    }
+  }
+
+  // ───────── stats ─────────
+  const done = results.filter(r => r.status === 'DONE')
+  const lats = done.map(r => r.latencyMs).filter(n => n != null)
+  const avgLat = lats.length ? Math.round(lats.reduce((a, b) => a + b, 0) / lats.length) : 0
+  const maxLat = lats.length ? Math.round(Math.max(...lats)) : 0
+  const parseFails = done.filter(r => r.verdict === 'PARSE_FAIL').length
+  const parseRate = done.length ? Math.round(((done.length - parseFails) / done.length) * 100) : 0
+  const elapsed = startT ? ((performance.now() - startT) / 1000).toFixed(1) : '0.0'
+  // Production mode hides trials the CT.gov API would never return for the
+  // user's stated condition (e.g., melanoma trials in a breast-cancer search).
+  // The headline agreement % then reflects what users would actually see,
+  // not the model's behavior on stress-test inputs.
+  const inScope = (r) => !productionMode || !r.trial.outOfScope
+  const withExpected = done.filter(r => r.trial.expected && inScope(r))
+  const matches = withExpected.filter(r => r.verdict === expectedBinary(r.trial.expected)).length
+  const agreementPct = withExpected.length ? Math.round((matches / withExpected.length) * 100) : null
+  const hiddenCount = done.filter(r => r.trial.outOfScope).length
+
+  const canRun = status === 'ready' && !running
+
+  return (
+    <div className="max-w-[1200px] mx-auto px-6 py-7 pb-20">
+      <h1 className="font-serif font-semibold text-[28px] tracking-tight text-parchment-950 mb-1">
+        Classification harness
+      </h1>
+      <p className="text-[13px] text-parchment-700 max-w-[640px] leading-relaxed mb-6">
+        Validate the proposed Stage-1 classifier (LIKELY / POSSIBLE / UNLIKELY) against real
+        ClinicalTrials.gov payloads using the on-device {model.label}. Pass criteria from the
+        Handoff: parse rate ≥ 90%, avg latency &lt; 1.5s, agreement ≥ 80%.
+      </p>
+
+      <div className="bg-white border border-parchment-200 rounded-xl p-5 mb-4">
+        <div className="flex flex-wrap items-center justify-between gap-3">
+          <div>
+            <div className="font-mono text-[10px] uppercase tracking-[0.08em] text-iris-700 mb-1">model</div>
+            <div className="font-mono text-[12px] text-parchment-900">
+              {model.label} ({model.sizeLabel}) · status:{' '}
+              <strong className={status === 'ready' ? 'text-signal-good' : 'text-parchment-700'}>
+                {status}
+              </strong>
+              {status === 'downloading' && progress && (
+                <span className="text-parchment-500"> · {Math.round((progress.progress ?? 0) * 100)}%</span>
+              )}
+            </div>
+            {status === 'downloading' && (
+              <p className="font-mono text-[11px] text-parchment-700 mt-1">{getProgressLabel()}</p>
+            )}
+            {!webGPUSupported && (
+              <p className="text-[12px] text-signal-bad mt-1">WebGPU unavailable in this browser.</p>
+            )}
+            {error && <p className="text-[12px] text-signal-bad mt-1">{error}</p>}
+          </div>
+          {status !== 'ready' && status !== 'downloading' && webGPUSupported && (
+            <button
+              type="button"
+              onClick={() => load(model.id, { isThinking: model.isThinking, chatOpts: model.chatOpts })}
+              className="bg-iris-600 text-white px-4 py-2 rounded-md text-[13px] font-semibold hover:bg-iris-700"
+            >
+              Load model
+            </button>
+          )}
+        </div>
+      </div>
+
+      <div className="bg-white border border-parchment-200 rounded-xl p-5 mb-4">
+        <h2 className="font-serif font-semibold text-base mb-3">Inputs</h2>
+        <div className="grid grid-cols-1 md:grid-cols-2 gap-4">
+          <div>
+            <div className="flex items-center justify-between mb-1.5 gap-2 flex-wrap">
+              <label className="font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-iris-700">
+                User description
+              </label>
+              <select
+                value={USER_PRESETS.find(p => p.text === userDesc)?.id ?? 'custom'}
+                onChange={e => {
+                  const preset = USER_PRESETS.find(p => p.id === e.target.value)
+                  if (preset) setUserDesc(preset.text)
+                }}
+                className="text-[11px] px-2 py-1 border border-parchment-300 rounded bg-white text-parchment-700"
+                title="Swap the patient description to test multilingual handling and edge cases"
+              >
+                {!USER_PRESETS.some(p => p.text === userDesc) && (
+                  <option value="custom">— custom —</option>
+                )}
+                {USER_PRESETS.map(p => (
+                  <option key={p.id} value={p.id}>{p.label}</option>
+                ))}
+              </select>
+            </div>
+            <textarea
+              rows={3}
+              value={userDesc}
+              onChange={e => setUserDesc(e.target.value)}
+              dir={userDesc.match(/[؀-ۿ]/) ? 'rtl' : 'ltr'}
+              className="w-full text-[13px] px-3 py-2.5 border border-parchment-300 rounded-lg bg-parchment-50 text-parchment-900 resize-y focus:outline-none focus:ring-2 focus:ring-iris-500"
+            />
+          </div>
+          <div>
+            <label className="font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-iris-700 mb-1.5 block">
+              Classify prompt template
+            </label>
+            <textarea
+              rows={6}
+              value={promptTemplate}
+              onChange={e => setPromptTemplate(e.target.value)}
+              className="w-full font-mono text-[12px] px-3 py-2.5 border border-parchment-300 rounded-lg bg-parchment-50 text-parchment-900 resize-y focus:outline-none focus:ring-2 focus:ring-iris-500"
+            />
+          </div>
+        </div>
+
+        <div className="mt-4">
+          <label className="font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-iris-700 mb-1.5 block">
+            Trials (JSON array — fixture loaded by default)
+          </label>
+          <textarea
+            rows={10}
+            value={trialsJson}
+            onChange={e => setTrialsJson(e.target.value)}
+            className="w-full font-mono text-[11.5px] px-3 py-2.5 border border-parchment-300 rounded-lg bg-parchment-50 text-parchment-900 resize-y focus:outline-none focus:ring-2 focus:ring-iris-500"
+          />
+        </div>
+
+        <div className="flex flex-wrap items-center gap-3 mt-4">
+          <button
+            type="button"
+            disabled={!canRun}
+            onClick={run}
+            className="bg-iris-600 text-white px-5 py-2.5 rounded-lg text-[13px] font-semibold hover:bg-iris-700 disabled:opacity-50 disabled:cursor-not-allowed"
+          >
+            {running ? `Running… (${done.length}/${results.length})` : 'Run classification'}
+          </button>
+          <button
+            type="button"
+            onClick={reset}
+            disabled={running}
+            className="border border-parchment-300 text-parchment-900 px-4 py-2 rounded-lg text-[12px] hover:bg-parchment-100 disabled:opacity-50"
+          >
+            Reset trials
+          </button>
+          <span
+            className="inline-flex items-center gap-2 text-[11px] text-parchment-700"
+            title="WebLLM's MLCEngine is single-threaded — parallel inference clobbers state. Requests serialize through a hook-level promise chain regardless of caller concurrency."
+          >
+            <span className="font-mono text-[10px] uppercase tracking-[0.04em]">execution</span>
+            serial (engine constraint)
+          </span>
+          <label
+            className="inline-flex items-center gap-1.5 text-[12px] text-parchment-900 cursor-pointer"
+            title="Translate the patient description to English once before classification. Runs only once per batch — adds ~1s amortized."
+          >
+            <input
+              type="checkbox"
+              checked={translateFirst}
+              onChange={e => setTranslateFirst(e.target.checked)}
+              disabled={running}
+              className="accent-iris-500"
+            />
+            translate to English first
+          </label>
+          <label
+            className="inline-flex items-center gap-1.5 text-[12px] text-parchment-900 cursor-pointer"
+            title="Production mode: agreement % only counts trials the CT.gov API would actually return for the patient's condition. Out-of-scope stress-test trials (different cancers, unrelated diseases) are still classified and shown but excluded from the headline."
+          >
+            <input
+              type="checkbox"
+              checked={productionMode}
+              onChange={e => setProductionMode(e.target.checked)}
+              className="accent-iris-500"
+            />
+            production-realistic agreement
+          </label>
+          <label className="inline-flex items-center gap-2 text-[12px] text-parchment-700">
+            Eligibility max chars
+            <input
+              type="number"
+              min={200}
+              max={8000}
+              step={100}
+              value={eligMax}
+              onChange={e => setEligMax(parseInt(e.target.value, 10) || 1500)}
+              disabled={running}
+              className="w-[90px] px-2 py-1 text-[12px] border border-parchment-300 rounded bg-white"
+            />
+          </label>
+        </div>
+
+        {translatedDesc && (
+          <div className="mt-3 px-3 py-2.5 bg-iris-50 border border-iris-100 rounded-lg text-[12px] text-parchment-900 leading-relaxed">
+            <span className="font-mono text-[10px] uppercase tracking-[0.08em] text-iris-700 mr-2">translated</span>
+            {translatedDesc}
+          </div>
+        )}
+
+        {(running || done.length > 0) && (
+          <div className="flex flex-wrap items-center gap-4 font-mono text-[11px] text-parchment-700 mt-3">
+            <span><strong className="text-parchment-950">{done.length} / {results.length}</strong> done</span>
+            <span>elapsed <strong className="text-parchment-950">{elapsed}s</strong></span>
+            <span>avg latency <strong className="text-parchment-950">{avgLat}ms</strong></span>
+            <span>max latency <strong className="text-parchment-950">{maxLat}ms</strong></span>
+            <span>parse rate <strong className="text-parchment-950">{parseRate}%</strong></span>
+            <span>parse fails <strong className="text-parchment-950">{parseFails}</strong></span>
+            {done.length > 0 && !running && (
+              <button
+                type="button"
+                onClick={copyMarkdown}
+                className="ml-auto inline-flex items-center gap-1.5 border border-iris-300 text-iris-700 hover:bg-iris-50 px-2.5 py-1 rounded text-[11px] transition-colors"
+                title="Copy a shareable markdown summary of this run to your clipboard"
+              >
+                {copyState === 'copied' ? '✓ copied' : copyState === 'error' ? 'copy failed' : 'copy results as markdown'}
+              </button>
+            )}
+          </div>
+        )}
+      </div>
+
+      <div className="bg-white border border-parchment-200 rounded-xl p-5 mb-4">
+        <h2 className="font-serif font-semibold text-base mb-3">Results</h2>
+        {results.length === 0 ? (
+          <p className="text-parchment-500 italic text-[13px] py-6 text-center">
+            No results yet — click <strong>Run classification</strong>.
+          </p>
+        ) : (
+          <ResultsTable rows={results} />
+        )}
+        {agreementPct != null && !running && (
+          <div className="font-mono text-[11px] text-parchment-700 mt-3 px-3 py-2.5 bg-iris-50 border border-iris-100 rounded-lg leading-relaxed">
+            <strong className="text-iris-700">Agreement with expected:</strong>{' '}
+            {matches} / {withExpected.length} ({agreementPct}%)
+            {productionMode && hiddenCount > 0 && (
+              <span className="text-parchment-700">
+                {' '}— {hiddenCount} out-of-scope trial{hiddenCount !== 1 ? 's' : ''} excluded
+                (the CT.gov API would not return them for this condition).
+              </span>
+            )}
+            {!productionMode && (
+              <span className="text-parchment-700">
+                {' '}— includes out-of-scope stress-test trials. Toggle <em>production-realistic</em> for the user-facing number.
+              </span>
+            )}
+          </div>
+        )}
+      </div>
+
+      <details className="text-[12px] text-parchment-700">
+        <summary className="cursor-pointer font-mono text-iris-700">Pass criteria (from Handoff)</summary>
+        <ul className="mt-2 ml-4 list-disc space-y-1">
+          <li>Parse rate ≥ 90% on 50+ real trials</li>
+          <li>Avg latency &lt; 1.5s per trial on a mid-range laptop</li>
+          <li>Agreement ≥ 80% on a labeled held-out set</li>
+          <li>No catastrophic UNLIKELY false-negatives (a viable trial ranked as UNLIKELY)</li>
+        </ul>
+      </details>
+    </div>
+  )
+}
+
+function buildMarkdownReport({ userDesc, translatedDesc, translateFirst, productionMode, hiddenCount, promptTemplate, eligMax, modelLabel, results, stats }) {
+  const escape = (s) => String(s ?? '').replace(/\|/g, '\\|').replace(/\n/g, ' ').trim()
+  const truncate = (s, n) => {
+    const t = escape(s)
+    return t.length > n ? t.slice(0, n - 1) + '…' : t
+  }
+
+  const lines = []
+  lines.push('# Classification harness run')
+  lines.push('')
+  lines.push(`**Model:** ${modelLabel}`)
+  lines.push(`**User description:** ${userDesc}`)
+  lines.push(`**Translate-first:** ${translateFirst ? 'ON' : 'off'}`)
+  if (translatedDesc) {
+    lines.push(`**Translated to:** ${translatedDesc}`)
+  }
+  lines.push(`**Eligibility max chars:** ${eligMax}`)
+  lines.push('')
+  lines.push('## Stats')
+  lines.push('')
+  lines.push(`| Metric | Value |`)
+  lines.push(`|---|---|`)
+  lines.push(`| Done | ${stats.done} / ${stats.total} |`)
+  lines.push(`| Elapsed | ${stats.elapsed}s |`)
+  lines.push(`| Avg latency | ${stats.avgLat}ms |`)
+  lines.push(`| Max latency | ${stats.maxLat}ms |`)
+  lines.push(`| Parse rate | ${stats.parseRate}% (${stats.parseFails} fails) |`)
+  if (stats.agreementPct != null) {
+    const note = productionMode
+      ? ` — ${hiddenCount || 0} out-of-scope trial(s) excluded`
+      : ' — includes out-of-scope stress-test trials'
+    lines.push(`| Agreement | ${stats.matches} / ${stats.withExpected} (${stats.agreementPct}%)${note} |`)
+  }
+  lines.push('')
+  lines.push('## Results')
+  lines.push('')
+  lines.push(`| Trial | NCT | Verdict | Expected | Match | Latency | Reason / Raw |`)
+  lines.push(`|---|---|---|---|---|---|---|`)
+  for (const r of results) {
+    if (r.status !== 'DONE') continue
+    const v = r.verdict || 'PARSE_FAIL'
+    const exp = r.trial.expected || '—'
+    const expBinary = r.trial.expected ? (r.trial.expected === 'POSSIBLE' ? 'LIKELY' : r.trial.expected) : null
+    const match = expBinary ? (r.verdict === expBinary ? '✓' : '✗') : ''
+    const latency = r.latencyMs != null ? `${Math.round(r.latencyMs)}ms` : '—'
+    const reasonOrRaw = r.reason && r.reason !== '(no reason)' ? r.reason : `raw: ${r.raw || '—'}`
+    lines.push(`| ${truncate(r.trial.title || r.trial.briefTitle || r.trial.nctId, 80)} | ${escape(r.trial.nctId || '')} | ${v} | ${exp} | ${match} | ${latency} | ${truncate(reasonOrRaw, 140)} |`)
+  }
+  lines.push('')
+  lines.push('<details>')
+  lines.push('<summary>Prompt template used</summary>')
+  lines.push('')
+  lines.push('```')
+  lines.push(promptTemplate)
+  lines.push('```')
+  lines.push('</details>')
+  return lines.join('\n')
+}
+
+function ResultsTable({ rows }) {
+  return (
+    <table className="w-full border-collapse text-[13px]">
+      <thead>
+        <tr>
+          <th className="text-left font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-parchment-700 px-2.5 py-2 border-b border-parchment-200" style={{ width: '38%' }}>
+            Trial
+          </th>
+          <th className="text-left font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-parchment-700 px-2.5 py-2 border-b border-parchment-200" style={{ width: '14%' }}>
+            Verdict
+          </th>
+          <th className="text-left font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-parchment-700 px-2.5 py-2 border-b border-parchment-200" style={{ width: '12%' }}>
+            Latency
+          </th>
+          <th className="text-left font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-parchment-700 px-2.5 py-2 border-b border-parchment-200">
+            Raw output / reason
+          </th>
+          <th className="text-left font-mono text-[10px] font-semibold uppercase tracking-[0.08em] text-parchment-700 px-2.5 py-2 border-b border-parchment-200" style={{ width: '12%' }}>
+            Expected
+          </th>
+        </tr>
+      </thead>
+      <tbody>
+        {rows.map((r, i) => {
+          const verdict = r.status === 'PENDING' ? 'PENDING' : (r.verdict || 'PARSE_FAIL')
+          const rawExpected = r.trial.expected
+          // Display: keep original 3-class label so the fixture still reads
+          // informationally; ✓/✗ uses binary mapping (POSSIBLE counts as LIKELY).
+          const expected = rawExpected || '—'
+          const match = r.verdict && rawExpected
+            ? (r.verdict === expectedBinary(rawExpected) ? '✓' : '✗')
+            : ''
+          const matchColor = match === '✓' ? 'text-signal-good' : match === '✗' ? 'text-signal-bad' : 'text-parchment-500'
+          return (
+            <tr key={i}>
+              <td className="px-2.5 py-3 border-b border-parchment-100 align-top">
+                <div className="font-serif font-semibold text-parchment-950 text-[13.5px] leading-snug">
+                  {r.trial.title || r.trial.briefTitle || r.trial.nctId}
+                </div>
+                <div className="font-mono text-[10.5px] text-parchment-500 mt-0.5">
+                  {r.trial.nctId || ''}
+                </div>
+              </td>
+              <td className="px-2.5 py-3 border-b border-parchment-100 align-top">
+                <span className={`inline-flex items-center gap-1.5 font-mono text-[11px] font-semibold px-2 py-0.5 rounded-full tracking-[0.04em] ${VERDICT_STYLES[verdict] ?? ''}`}>
+                  {verdict}
+                </span>
+              </td>
+              <td className="px-2.5 py-3 border-b border-parchment-100 align-top font-mono text-[12px] text-parchment-700">
+                {r.latencyMs != null ? `${Math.round(r.latencyMs)}ms` : '—'}
+              </td>
+              <td className="px-2.5 py-3 border-b border-parchment-100 align-top">
+                <div className="text-[12.5px] text-parchment-900 leading-relaxed">{r.reason || '—'}</div>
+                {r.raw && r.raw !== r.reason && (
+                  <div className="font-mono text-[11px] text-parchment-700 mt-1 whitespace-pre-wrap break-words max-w-[380px]">
+                    raw: {r.raw}
+                  </div>
+                )}
+              </td>
+              <td className="px-2.5 py-3 border-b border-parchment-100 align-top font-mono text-[11px] text-parchment-700">
+                {expected}
+                {match && <span className={`ml-1.5 font-semibold ${matchColor}`}>{match}</span>}
+              </td>
+            </tr>
+          )
+        })}
+      </tbody>
+    </table>
+  )
+}
diff --git a/src/components/NaturalLanguageInput.test.jsx b/src/components/NaturalLanguageInput.test.jsx
index ca59891..a9fd691 100644
--- a/src/components/NaturalLanguageInput.test.jsx
+++ b/src/components/NaturalLanguageInput.test.jsx
@@ -158,3 +158,41 @@ describe('NaturalLanguageInput — error state', () => {
     expect(screen.getByText(/try again/i)).toBeInTheDocument()
   })
 })
+
+describe('NaturalLanguageInput — queued submit during download', () => {
+  // Locks in the typing-while-loading flow: a user can hit Find trials while
+  // the model is still downloading; the intent is held until status flips to
+  // 'ready' and then auto-fires. Indirect smoke test for the StrictMode
+  // listener fix in useNLP — if the listener didn't re-attach after the dev
+  // double-invoke, the real-world status would never reach 'ready' and the
+  // drain effect (deps [status, pendingSubmit]) would never fire.
+  it('queues submit while downloading, fires extract once status flips to ready', async () => {
+    const extract = vi.fn().mockResolvedValue({
+      condition: 'breast cancer', location: null, age: 58, sex: 'FEMALE',
+      status: 'RECRUITING', phases: [],
+    })
+    useNLP.mockReturnValue({ ...baseHook, status: 'downloading', extract })
+    localStorage.setItem('iris_nlp_enabled', 'true')
+
+    const onExtract = vi.fn()
+    const { rerender } = render(<NaturalLanguageInput onExtract={onExtract} />)
+    fireEvent.click(screen.getByRole('button', { name: /describe in your own words/i }))
+
+    fireEvent.change(screen.getByRole('textbox', { name: /natural language search/i }), {
+      target: { value: '58 with breast cancer' },
+    })
+
+    // Submit while downloading — should queue, NOT fire extract yet.
+    fireEvent.click(screen.getByRole('button', { name: /Run when ready/i }))
+    expect(extract).not.toHaveBeenCalled()
+    expect(screen.getByRole('button', { name: /Queued/i })).toBeInTheDocument()
+
+    // Worker reports ready. In production this comes via the listener that
+    // the StrictMode fix ensures stays attached after the cleanup-remount.
+    useNLP.mockReturnValue({ ...baseHook, status: 'ready', extract })
+    rerender(<NaturalLanguageInput onExtract={onExtract} />)
+
+    await waitFor(() => expect(extract).toHaveBeenCalledWith('58 with breast cancer'))
+    await waitFor(() => expect(onExtract).toHaveBeenCalled())
+  })
+})
diff --git a/src/components/ResultCard.jsx b/src/components/ResultCard.jsx
index a520860..e9cc060 100644
--- a/src/components/ResultCard.jsx
+++ b/src/components/ResultCard.jsx
@@ -36,6 +36,36 @@ function SectionLabel({ children, pane }) {
   )
 }
 
+// Two-stage on-device pipeline status. Renders only in pane (detail) view
+// because the row already has a fit dot indicator. Tells the user
+// explicitly which stage is in flight so the empty content area below
+// doesn't read as "broken".
+function PipelineCaption({ stage, progress }) {
+  if (stage === 'classifying') {
+    return (
+      <div className="mb-5 flex items-center gap-2 px-3 py-2 rounded-lg bg-iris-50 border border-iris-100">
+        <span className="iris-shimmer-text inline-block w-2 h-2 rounded-full" aria-hidden="true">&nbsp;</span>
+        <span className="font-mono text-[11px] text-iris-700">
+          evaluating fit
+          {progress && progress.total > 0 && ` · ${progress.done} of ${progress.total}`}
+          <span className="text-parchment-700"> · plain-language summary will follow</span>
+        </span>
+      </div>
+    )
+  }
+  if (stage === 'awaiting-summary') {
+    return (
+      <div className="mb-5 flex items-center gap-2 px-3 py-2 rounded-lg bg-parchment-100 border border-parchment-200">
+        <span className="iris-shimmer-text inline-block w-2 h-2 rounded-full" aria-hidden="true">&nbsp;</span>
+        <span className="font-mono text-[11px] text-parchment-700">
+          generating plain-language summary…
+        </span>
+      </div>
+    )
+  }
+  return null
+}
+
 function MetaLine({ trial, nearest, pane }) {
   const sep = (
     <span aria-hidden="true" className={pane ? 'text-parchment-300' : 'text-parchment-500'}>
@@ -80,6 +110,8 @@ export default function ResultCard({
   inputLanguage = 'en',
   simplificationSupported = true,
   pane = false,
+  pipelineStage = null, // 'classifying' | 'awaiting-summary' | null
+  classifyProgress = null, // { done, total }
 }) {
   const nearest = nearestLocation(trial.locations, coords)
   const wrapperClass = pane
@@ -87,11 +119,12 @@ export default function ResultCard({
     : 'bg-white border border-parchment-400 rounded-lg p-5 mb-3 max-w-3xl'
 
   const sumState = simplification?.summarize
-  const fitState = simplification?.fit
-
+  // fitState/showFit removed when the "Why this might or might not fit you"
+  // section was dropped — Gemma 2B's accuracy on the fit narrative wasn't
+  // reliable enough to ship. Re-introduce both if the fit section comes
+  // back behind a fine-tuned model.
   const showPlainLanguage = sumState && sumState.status !== 'error'
   const showFallbackHint = sumState?.status === 'error'
-  const showFit = fitState && fitState.status !== 'error' && fitState.text
 
   return (
     <article className={wrapperClass}>
@@ -113,6 +146,10 @@ export default function ResultCard({
 
       <MetaLine trial={trial} nearest={nearest} pane={pane} />
 
+      {pane && pipelineStage && (
+        <PipelineCaption stage={pipelineStage} progress={classifyProgress} />
+      )}
+
       {showPlainLanguage && (
         <div className={pane ? 'mb-4' : 'mb-3'}>
           <div className={pane ? 'mb-4' : ''}>
@@ -135,16 +172,11 @@ export default function ResultCard({
             </div>
           )}
 
-          {showFit && (
-            <div className={pane ? 'mb-4' : ''}>
-              <SectionLabel pane={pane}>Why this might or might not fit you</SectionLabel>
-              <p className={pane
-                ? 'text-[15px] text-parchment-900 leading-[1.6] whitespace-pre-wrap'
-                : 'text-sm text-parchment-900 leading-relaxed mb-3 whitespace-pre-wrap'}>
-                {fitState.text}
-              </p>
-            </div>
-          )}
+          {/* "Why this might or might not fit you" intentionally omitted —
+              Gemma 2B's accuracy on the fit narrative isn't reliable
+              enough to ship. The TriageRow fit dot (driven by the
+              classifier) is the safer signal. The DoctorDisclaimer
+              below renders unconditionally to set expectations. */}
 
           {(sumState.status === 'queued' || sumState.status === 'streaming') && (
             <p className="font-mono text-[11px] text-parchment-700 italic mb-2">
@@ -224,8 +256,26 @@ export default function ResultCard({
         </p>
       )}
 
+      {pane && (
+        <details className="mt-6 mb-2 px-4 py-3 rounded-lg bg-iris-50 border border-iris-100 group">
+          <summary className="cursor-pointer list-none text-[13px] text-parchment-900 leading-relaxed select-none">
+            <span className="font-semibold text-iris-700">Check with your doctor when exploring treatment options</span>
+            {' '}— this AI summary uses plain language to explain the treatment but can miss
+            eligibility details.
+            <span className="font-mono text-[11px] text-iris-700 ml-2 opacity-70 group-open:hidden">
+              why?
+            </span>
+          </summary>
+          <p className="mt-3 text-[13px] text-parchment-900 leading-relaxed">
+            The plain-language summary above was generated on your device by a small AI model. It
+            can miss or misstate who qualifies for a trial. Your care team has your full medical
+            picture and can confirm whether this one actually fits.
+          </p>
+        </details>
+      )}
+
       {pane ? (
-        <div className="mt-6 pt-5 border-t border-parchment-200 flex flex-col gap-1.5 text-[13px]">
+        <div className="mt-4 pt-5 border-t border-parchment-200 flex flex-col gap-1.5 text-[13px]">
           <div className="font-mono text-[11px] text-parchment-700 mb-1">contact</div>
           {trial.contact.phone && (
             <span className="text-parchment-900">{trial.contact.phone}</span>
diff --git a/src/components/ResultCard.test.jsx b/src/components/ResultCard.test.jsx
index fdc91e3..99326c5 100644
--- a/src/components/ResultCard.test.jsx
+++ b/src/components/ResultCard.test.jsx
@@ -140,22 +140,19 @@ describe('ResultCard — Phase 3 simplification', () => {
     expect(screen.getByText(/Plain-language version unavailable/i)).toBeInTheDocument()
   })
 
-  it('renders the fit paragraph when fit state is complete', () => {
+  // The "Why this might or might not fit you" section was removed because
+  // Gemma 2B's accuracy on the fit narrative wasn't reliable enough to
+  // ship — it occasionally flipped disease stage or treatment history.
+  // The TriageRow fit dot (driven by the binary classifier in
+  // useClassifier) is the safer signal. The simplifier still computes
+  // assess_fit when called; ResultCard just no longer renders it.
+  it('does not render the fit paragraph even when fit state is complete', () => {
     const simplification = {
       summarize: { status: 'complete', summary: 'Sum.', eligibility: 'Elig.', error: null },
       fit: { status: 'complete', text: 'This may fit you because…', error: null },
     }
     render(<ResultCard trial={trial} coords={null} simplification={simplification} />)
-    expect(screen.getByText(/Why this might or might not fit you/i)).toBeInTheDocument()
-    expect(screen.getByText('This may fit you because…')).toBeInTheDocument()
-  })
-
-  it('does not render fit section when fit is in error', () => {
-    const simplification = {
-      summarize: { status: 'complete', summary: 'Sum.', eligibility: 'Elig.', error: null },
-      fit: { status: 'error', text: '', error: 'failed' },
-    }
-    render(<ResultCard trial={trial} coords={null} simplification={simplification} />)
     expect(screen.queryByText(/Why this might or might not fit you/i)).not.toBeInTheDocument()
+    expect(screen.queryByText('This may fit you because…')).not.toBeInTheDocument()
   })
 })
diff --git a/src/components/ResultsList.jsx b/src/components/ResultsList.jsx
index e0baa05..12813dd 100644
--- a/src/components/ResultsList.jsx
+++ b/src/components/ResultsList.jsx
@@ -1,7 +1,12 @@
-import { useEffect, useMemo, useState } from 'react'
+import { useEffect, useMemo, useRef, useState } from 'react'
 import { useGeocode } from '../hooks/useGeocode'
 import { useClinicalTrials } from '../hooks/useClinicalTrials'
 import { useSimplifier } from '../hooks/useSimplifier'
+import { useNLP } from '../hooks/useNLP'
+import { useClassifier } from '../hooks/useClassifier'
+import { useIsMobile } from '../hooks/useIsMobile'
+import { NLP_MODELS } from '../utils/nlpModels'
+import { buildClassifyPrompt, parseVerdict } from '../utils/classifyTrial'
 import ResultCard from './ResultCard'
 import TriageRow from './TriageRow'
 import MobileSheet from './MobileSheet'
@@ -11,22 +16,31 @@ import {
   SUPPORTED_SIMPLIFICATION_LANGUAGES,
 } from '../utils/detectInputLanguage'
 
+const NLP_CONSENT_KEY = 'iris_nlp_enabled'
+
+// Stage-1 classification is wired end-to-end (worker, hook, harness) but
+// not yet surfaced in the in-app results UI. Reason: without sort wiring
+// the fit dots don't drive any user-visible behavior — they're just
+// decoration. The harness at ?test=classify still uses the full pipeline
+// for prompt iteration and validation. Flip this to true once "Best fit"
+// sort is wired so the dots become actionable.
+const ENABLE_CLASSIFY_IN_RESULTS = false
+
+// Build a synthetic patient description from extracted fields when the user
+// came in via structured form but had previously used NL (so consent exists).
+function patientDescFromFields(fields) {
+  if (!fields) return null
+  const parts = []
+  if (fields.age != null) parts.push(`${fields.age}-year-old`)
+  if (fields.sex && fields.sex !== 'ALL') parts.push(fields.sex.toLowerCase())
+  if (fields.condition) parts.push(`with ${fields.condition}`)
+  if (fields.location) parts.push(`in ${fields.location}`)
+  return parts.length > 0 ? parts.join(' ') : null
+}
+
 const EAGER_BATCH_SIZE = 5
-const MOBILE_BREAKPOINT_PX = 820
 const LIST_WIDTH_PX = 400
 
-function useIsMobile() {
-  const [isMobile, setIsMobile] = useState(() =>
-    typeof window !== 'undefined' && window.innerWidth <= MOBILE_BREAKPOINT_PX
-  )
-  useEffect(() => {
-    const onResize = () => setIsMobile(window.innerWidth <= MOBILE_BREAKPOINT_PX)
-    window.addEventListener('resize', onResize)
-    return () => window.removeEventListener('resize', onResize)
-  }, [])
-  return isMobile
-}
-
 export default function ResultsList({ searchParams, modelKey, userDescription, extractedFields }) {
   // Phase 3 simplification only ships for English and Spanish — those are
   // the languages we've verified the local model produces accurately.
@@ -58,13 +72,73 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
     extractedFields,
   })
 
-  const allTrials = data?.pages.flatMap(p => p.trials) ?? []
+  // Memoized so effect dep arrays comparing against allTrials don't churn
+  // every render — react-query returns the same `data` ref while data is
+  // unchanged, so memo identity is stable across non-data renders.
+  const allTrials = useMemo(
+    () => data?.pages.flatMap(p => p.trials) ?? [],
+    [data]
+  )
 
   const isMobile = useIsMobile()
   const [selectedNctId, setSelectedNctId] = useState(null)
   const [sheetOpen, setSheetOpen] = useState(false)
   const [compareSet, setCompareSet] = useState(() => new Set())
 
+  // ─── Stage-1 classification ───────────────────────────────────────
+  // Only fires when the user previously consented to the on-device model
+  // (iris_nlp_enabled localStorage key, set during NL flow). Structured-
+  // form-only sessions skip classification entirely — no auto-load,
+  // no covert worker initialization. Verdicts surface as fit dots in
+  // TriageRow + a "evaluating fit · X of N" caption in the toolbar.
+  const nlp = useNLP()
+  const { classifyOne } = useClassifier()
+  const [classifications, setClassifications] = useState(new Map())
+  const [classifyProgress, setClassifyProgress] = useState({ done: 0, total: 0 })
+  const classifiedRef = useRef(new Set())
+  const cancelClassifyRef = useRef(null)
+
+  const consented = useMemo(() => {
+    try { return localStorage.getItem(NLP_CONSENT_KEY) === 'true' } catch { return false }
+  }, [])
+  const patientDesc = useMemo(
+    () => userDescription || patientDescFromFields(extractedFields),
+    [userDescription, extractedFields]
+  )
+  const canClassify = ENABLE_CLASSIFY_IN_RESULTS && consented && nlp.webGPUSupported && Boolean(patientDesc)
+
+  // Idempotent: worker fast-returns 'ready' if engine already loaded
+  // (e.g. NL extraction loaded it earlier this session). Destructure
+  // load() out of nlp so we can list it in deps directly — `nlp` itself
+  // is a fresh object on every render (useNLP doesn't memoize its
+  // return), and listing the whole hook would re-fire the effect on
+  // every render even when nothing relevant changed.
+  const nlpLoad = nlp.load
+  useEffect(() => {
+    if (!canClassify) return
+    if (nlp.status !== 'idle') return
+    const model = NLP_MODELS[modelKey] ?? NLP_MODELS.gemma
+    nlpLoad(model.id, { isThinking: model.isThinking, chatOpts: model.chatOpts })
+  }, [canClassify, nlp.status, modelKey, nlpLoad])
+
+  // Reset classification state when EITHER the search params OR the patient
+  // description changes. Including patientDesc handles the case where a user
+  // hits "Find trials" again with a refined prompt that happens to extract
+  // to the same condition: the API result set may be cached (same trials)
+  // but the verdicts are now stale w.r.t. the new patient description, so
+  // classifications + classifiedRef must be wiped and the in-flight batch
+  // cancelled so the next pass re-classifies against the new patient.
+  // Also resets the simplifier so any in-flight summary stops competing
+  // with the re-classification pass.
+  useEffect(() => {
+    classifiedRef.current = new Set()
+    setClassifications(new Map())
+    setClassifyProgress({ done: 0, total: 0 })
+    if (cancelClassifyRef.current) cancelClassifyRef.current()
+    simplifier.cancelPending()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [searchParams, patientDesc])
+
   function toggleCompare(nctId) {
     setCompareSet(prev => {
       const next = new Set(prev)
@@ -93,22 +167,82 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
     if (isMobile) setSheetOpen(true)
   }
 
-  // Fire when the result set changes — keyed on the first 5 NCT IDs.
-  // Using searchParams as the key would fire too early (before data arrives);
-  // using allTrials would re-fire on every pagination append.
+  // Classify newly-arrived trials. Pagination appends → classify only new
+  // NCTs. Engine-not-loaded check is via nlp.status !== 'ready'.
+  const trialKeyAll = allTrials.map(t => t.nctId).join(',')
+  useEffect(() => {
+    if (!canClassify || nlp.status !== 'ready' || !patientDesc) return
+    const newTrials = allTrials.filter(t => !classifiedRef.current.has(t.nctId))
+    if (newTrials.length === 0) return
+    for (const t of newTrials) classifiedRef.current.add(t.nctId)
+
+    setClassifyProgress(prev => ({ done: prev.done, total: prev.total + newTrials.length }))
+
+    let cancelled = false
+    cancelClassifyRef.current = () => { cancelled = true }
+    ;(async () => {
+      for (const trial of newTrials) {
+        if (cancelled) return
+        try {
+          const prompt = buildClassifyPrompt(patientDesc, trial)
+          const { raw } = await classifyOne(prompt)
+          const parsed = parseVerdict(raw)
+          if (cancelled) return
+          setClassifications(prev => {
+            const next = new Map(prev)
+            next.set(trial.nctId, { status: 'done', ...parsed, raw })
+            return next
+          })
+        } catch (err) {
+          if (cancelled) return
+          setClassifications(prev => {
+            const next = new Map(prev)
+            next.set(trial.nctId, { status: 'done', verdict: 'PARSE_FAIL', reason: err?.message ?? 'classify error' })
+            return next
+          })
+        } finally {
+          if (!cancelled) {
+            setClassifyProgress(prev => ({ ...prev, done: prev.done + 1 }))
+          }
+        }
+      }
+    })()
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [canClassify, nlp.status, patientDesc, trialKeyAll])
+
+  // Reset the simplifier when the result set changes (new search). The
+  // per-trial enqueue happens below in the selected-trial effect.
   const eagerKey = allTrials.slice(0, EAGER_BATCH_SIZE).map(t => t.nctId).join(',')
   useEffect(() => {
     simplifier.cancelPending()
     simplifier.resetCache()
-    if (allTrials.length === 0) return
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [eagerKey])
+
+  // Per Handoff Phase 3 step 6: stage-2 simplification only fires for the
+  // currently-selected trial. Critically, it WAITS for stage-1
+  // classification to finish first — otherwise both compete for the
+  // single-threaded worker, the classifier appears to stall, and the
+  // simplifier (running first) produces noisier output under contention.
+  // For structured-form-only sessions canClassify is false and
+  // classifyProgress.total stays 0, so the gate falls through to "true"
+  // and simplification runs immediately on selection.
+  const classifyDone = !canClassify || (
+    classifyProgress.total > 0 && classifyProgress.done >= classifyProgress.total
+  )
+  useEffect(() => {
     if (!simplificationSupported) return
-    const eager = allTrials.slice(0, EAGER_BATCH_SIZE)
-    for (const t of eager) simplifier.enqueueSummarize(t, { outputLanguage })
-    if (extractedFields) {
-      for (const t of eager) simplifier.enqueueAssessFit(t, { outputLanguage })
-    }
+    if (!selected) return
+    if (!classifyDone) return
+    simplifier.enqueueSummarize(selected, { outputLanguage })
+    // assess_fit ("Why this might or might not fit you") intentionally not
+    // enqueued — Gemma 2B's accuracy on the fit narrative isn't reliable
+    // enough to ship (it occasionally flips disease stage / treatment
+    // history). The classifier's binary verdict + dot is the safer signal.
+    // The assess_fit pipeline itself stays in useSimplifier in case we
+    // re-enable it on a fine-tuned model later.
     // eslint-disable-next-line react-hooks/exhaustive-deps
-  }, [eagerKey, simplificationSupported, outputLanguage])
+  }, [selected?.nctId, simplificationSupported, outputLanguage, classifyDone])
 
   if (isLoading) {
     return (
@@ -156,19 +290,37 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
   function handleRequestSimplify(trial) {
     if (!simplificationSupported) return
     simplifier.enqueueSummarize(trial, { outputLanguage })
-    if (extractedFields) simplifier.enqueueAssessFit(trial, { outputLanguage })
+    // assess_fit deliberately omitted — see selected-trial effect above.
   }
 
   function renderDetail(trial) {
+    // Tell ResultCard which pipeline stage is in flight so it can render
+    // an explicit progress caption above the empty content area instead of
+    // showing the trial's raw summary (which can look like the model
+    // already replied with the wrong text).
+    let pipelineStage = null
+    const sim = simplifier.states.get(trial.nctId)
+    const simStatus = sim?.summarize?.status
+    if (canClassify && !classifyDone) {
+      pipelineStage = 'classifying'
+    } else if (
+      simplificationSupported &&
+      (!simStatus || simStatus === 'queued') &&
+      classifyDone
+    ) {
+      pipelineStage = 'awaiting-summary'
+    }
     return (
       <ResultCard
         trial={trial}
         coords={coords ?? null}
-        simplification={simplifier.states.get(trial.nctId)}
+        simplification={sim}
         onRequestSimplify={simplificationSupported ? handleRequestSimplify : null}
         inputLanguage={inputLanguage}
         simplificationSupported={simplificationSupported}
         pane
+        pipelineStage={pipelineStage}
+        classifyProgress={canClassify ? classifyProgress : null}
       />
     )
   }
@@ -184,6 +336,7 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
       <ResultsToolbar
         totalCount={totalCount}
         searchParams={searchParams}
+        classifyProgress={canClassify ? classifyProgress : null}
       />
 
       <div
@@ -204,6 +357,8 @@ export default function ResultsList({ searchParams, modelKey, userDescription, e
                   comparing={compareSet.has(trial.nctId)}
                   onToggleCompare={toggleCompare}
                   compareDisabled={compareSet.size >= 3}
+                  classification={canClassify ? classifications.get(trial.nctId) : null}
+                  classifyPending={canClassify && !classifications.has(trial.nctId)}
                 />
               </li>
             ))}
@@ -262,16 +417,14 @@ const PHASE_LABELS = {
   PHASE4: 'Phase 4',
 }
 
-const SORT_OPTIONS = [
-  { id: 'fit',      label: 'Best fit',     disabled: true, title: 'Available once on-device classification runs' },
-  { id: 'distance', label: 'Distance',     disabled: true, title: 'Sort wiring coming in a follow-up' },
-  { id: 'phase',    label: 'Phase',        disabled: true, title: 'Sort wiring coming in a follow-up' },
-  { id: 'recent',   label: 'Most recent',  disabled: true, title: 'Sort wiring coming in a follow-up' },
-]
-
-function ResultsToolbar({ totalCount, searchParams }) {
-  const [sort, setSort] = useState('recent')
+// Sort UI removed — the chips were visible-but-disabled placeholders for
+// "Best fit" / "Distance" / "Phase" / "Most recent" which read as broken
+// to users. When sort wiring lands (CT.gov API supports `sort=` for
+// distance and last-update; "Best fit" needs the classifier verdicts
+// per-trial), restore from git history at 67d5fc8 and wire onClick →
+// re-fetch through useClinicalTrials with the new sort token.
 
+function ResultsToolbar({ totalCount, searchParams, classifyProgress }) {
   const summaryParts = [`${totalCount.toLocaleString()} trial${totalCount !== 1 ? 's' : ''}`]
   if (searchParams.location) summaryParts.push(`near ${searchParams.location}`)
   if (searchParams.location && searchParams.radius) summaryParts.push(`within ${searchParams.radius} mi`)
@@ -282,7 +435,7 @@ function ResultsToolbar({ totalCount, searchParams }) {
   }
 
   return (
-    <div className="px-4 sm:px-6 py-3 border-b border-parchment-200 flex flex-wrap items-center justify-between gap-x-6 gap-y-2">
+    <div className="px-4 sm:px-6 py-3 border-b border-parchment-200 flex flex-wrap items-center gap-x-6 gap-y-2">
       <p className="font-mono text-[11px] text-parchment-700 leading-snug">
         {summaryParts.map((part, i) => (
           <span key={i}>
@@ -290,35 +443,15 @@ function ResultsToolbar({ totalCount, searchParams }) {
             {part}
           </span>
         ))}
+        {classifyProgress && classifyProgress.total > 0 && (
+          <span className="ml-3 text-iris-700">
+            <span className="text-parchment-300 mr-1.5" aria-hidden="true">·</span>
+            {classifyProgress.done < classifyProgress.total
+              ? `evaluating fit · ${classifyProgress.done} of ${classifyProgress.total}`
+              : `fit evaluated for ${classifyProgress.total}`}
+          </span>
+        )}
       </p>
-      <div className="hidden sm:flex items-center gap-1" role="group" aria-label="Sort results">
-        <span className="font-mono text-[10px] uppercase tracking-[0.08em] text-parchment-700 mr-2">
-          sort
-        </span>
-        {SORT_OPTIONS.map(opt => {
-          const active = sort === opt.id
-          return (
-            <button
-              key={opt.id}
-              type="button"
-              onClick={() => !opt.disabled && setSort(opt.id)}
-              disabled={opt.disabled}
-              title={opt.title}
-              {...(opt.disabled ? {} : { 'aria-pressed': active })}
-              className={[
-                'text-[11px] px-2 py-0.5 rounded-md transition-colors',
-                opt.disabled
-                  ? 'text-parchment-500 cursor-not-allowed'
-                  : active
-                    ? 'bg-iris-50 text-iris-700 font-medium'
-                    : 'text-parchment-700 hover:text-parchment-950 hover:bg-parchment-100',
-              ].join(' ')}
-            >
-              {opt.label}
-            </button>
-          )
-        })}
-      </div>
     </div>
   )
 }
diff --git a/src/components/TriageRow.jsx b/src/components/TriageRow.jsx
index da37199..e7d183d 100644
--- a/src/components/TriageRow.jsx
+++ b/src/components/TriageRow.jsx
@@ -1,5 +1,41 @@
 import { nearestLocation } from '../utils/apiHelpers'
 
+function FitDot({ classification, pending }) {
+  if (classification?.verdict === 'PARSE_FAIL') return null
+
+  if (pending && !classification) {
+    return (
+      <span
+        className="iris-shimmer-text inline-block w-2 h-2 rounded-full mr-1"
+        title="Evaluating fit…"
+        aria-label="Evaluating fit"
+      >&nbsp;</span>
+    )
+  }
+  if (!classification) return null
+
+  const isLikely = classification.verdict === 'LIKELY'
+  // Fold the model's reason into aria-label so SR/keyboard users get the
+  // same context as a sighted hover. title alone wasn't reaching either
+  // group reliably (title isn't announced by most screen readers, isn't
+  // keyboard-discoverable). Same string in both attrs means verdict +
+  // reason are the unit a user perceives, not just the verdict.
+  const label = isLikely
+    ? `Likely fit — ${classification.reason || 'matches your description'}`
+    : `Less likely fit — ${classification.reason || 'may not match'}`
+  return (
+    <span
+      role="img"
+      className={[
+        'inline-block w-2 h-2 rounded-full mr-1 shrink-0',
+        isLikely ? 'bg-iris-500' : 'border border-parchment-400',
+      ].join(' ')}
+      title={label}
+      aria-label={label}
+    />
+  )
+}
+
 const PHASE_SHORT = {
   EARLY_PHASE1: 'Early Phase 1',
   PHASE1: 'Phase 1',
@@ -22,6 +58,8 @@ export default function TriageRow({
   comparing = false,
   onToggleCompare,
   compareDisabled = false,
+  classification = null,
+  classifyPending = false,
 }) {
   const nearest = nearestLocation(trial.locations, coords)
   const phase = formatPhase(trial.phases)
@@ -51,7 +89,8 @@ export default function TriageRow({
         >
           {trial.title}
         </h3>
-        <span className="font-mono text-[11px] text-parchment-700 flex flex-wrap gap-x-1.5">
+        <span className="font-mono text-[11px] text-parchment-700 flex flex-wrap items-center gap-x-1.5">
+          <FitDot classification={classification} pending={classifyPending} />
           {nearest?.distanceMi != null && <span>{nearest.distanceMi} mi</span>}
           {nearest?.distanceMi != null && phase && <span aria-hidden="true">·</span>}
           {phase && <span>{phase}</span>}
diff --git a/src/hooks/useClassifier.js b/src/hooks/useClassifier.js
new file mode 100644
index 0000000..69042dd
--- /dev/null
+++ b/src/hooks/useClassifier.js
@@ -0,0 +1,83 @@
+import { useRef, useEffect, useCallback } from 'react'
+import { getSharedWorker, attachListener } from '../workers/sharedNlpWorker'
+
+// Two task hooks (classifyOne, translateOne) share a single promise chain
+// because WebLLM's MLCEngine is NOT parallel-safe. Concurrent
+// engine.chat.completions.create() calls clobber state and produce
+// "Message error should not be 0" failures. Callers can fire-and-forget
+// concurrently; each request waits its turn behind the chain.
+//
+// The two task types are functionally similar (one-shot completion with
+// raw + latencyMs return) but conceptually distinct, so they get distinct
+// worker message types ('classify' vs 'translate') for clarity and so the
+// worker can use different max_tokens budgets.
+//
+// The worker must already have the model loaded. classify/translateOne
+// reject with 'Engine not loaded' otherwise.
+export function useClassifier() {
+  const pendingRef = useRef(new Map())
+  const detachRef = useRef(null)
+  const taskIdRef = useRef(0)
+  const chainRef = useRef(Promise.resolve())
+
+  function ensureSubscribed() {
+    if (detachRef.current) return
+    detachRef.current = attachListener(handleMessage)
+  }
+
+  function handleMessage(event) {
+    const { type, taskId, raw, latencyMs, message } = event.data ?? {}
+    const isDone = type === 'classify_done' || type === 'translate_done'
+    const isError = type === 'classify_error' || type === 'translate_error'
+    if (!isDone && !isError) return
+    const pending = pendingRef.current.get(taskId)
+    if (!pending) return
+    pendingRef.current.delete(taskId)
+    if (isDone) pending.resolve({ raw, latencyMs })
+    else pending.reject(new Error(message ?? 'task failed'))
+  }
+
+  useEffect(() => {
+    const pending = pendingRef.current
+    return () => {
+      detachRef.current?.()
+      detachRef.current = null
+      // Reject every in-flight task so awaiting callers don't hang
+      // forever when the component unmounts mid-batch (or during a
+      // StrictMode dev double-invoke).
+      for (const { reject } of pending.values()) {
+        reject(new Error('classifier unmounted'))
+      }
+      pending.clear()
+    }
+  }, [])
+
+  // Generic task runner — same chain semantics, different worker message
+  // type. taskIdPrefix lets handleMessage route done/error messages back
+  // to the right pending entry; it doesn't have to be unique per type
+  // (the Map is keyed on the full taskId) but it makes worker logs
+  // self-documenting.
+  function runTask(workerType, taskIdPrefix, prompt) {
+    ensureSubscribed()
+    const taskId = `${taskIdPrefix}-${++taskIdRef.current}`
+    const next = chainRef.current.catch(() => {}).then(() =>
+      new Promise((resolve, reject) => {
+        pendingRef.current.set(taskId, { resolve, reject })
+        getSharedWorker().postMessage({ type: workerType, taskId, prompt })
+      })
+    )
+    chainRef.current = next
+    return next
+  }
+
+  // runTask only closes over refs (pendingRef, chainRef, taskIdRef, detachRef)
+  // which are stable across renders, so it's safe to omit from useCallback
+  // deps. The exhaustive-deps lint can't see through this because runTask
+  // is defined in the function body each render.
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  const classifyOne = useCallback((prompt) => runTask('classify', 'classify', prompt), [])
+  // eslint-disable-next-line react-hooks/exhaustive-deps
+  const translateOne = useCallback((prompt) => runTask('translate', 'translate', prompt), [])
+
+  return { classifyOne, translateOne }
+}
diff --git a/src/hooks/useClassifier.test.js b/src/hooks/useClassifier.test.js
new file mode 100644
index 0000000..61f0c85
--- /dev/null
+++ b/src/hooks/useClassifier.test.js
@@ -0,0 +1,94 @@
+import { describe, it, expect, vi, beforeEach } from 'vitest'
+import { renderHook, act, waitFor } from '@testing-library/react'
+import { useClassifier } from './useClassifier'
+
+// Mock the shared worker so tests don't touch the real WebLLM worker.
+// We intercept postMessage to capture call order, and we expose a way for
+// the test to invoke the listener with synthetic 'classify_done' messages.
+let capturedListener = null
+let postedMessages = []
+
+vi.mock('../workers/sharedNlpWorker', () => ({
+  getSharedWorker: () => ({
+    postMessage: (msg) => { postedMessages.push(msg) },
+  }),
+  attachListener: (fn) => {
+    capturedListener = fn
+    return () => { capturedListener = null }
+  },
+}))
+
+beforeEach(() => {
+  capturedListener = null
+  postedMessages = []
+})
+
+// Helper: construct a 'classify_done' worker message and pass it to whatever
+// useClassifier registered as its listener.
+function dispatchDone(taskId, raw = 'LIKELY | mock', latencyMs = 100) {
+  capturedListener({ data: { type: 'classify_done', taskId, raw, latencyMs } })
+}
+
+describe('useClassifier — promise chain serialization', () => {
+  it('posts only the first request to the worker until it settles', async () => {
+    const { result } = renderHook(() => useClassifier())
+
+    // Fire 3 concurrent classifyOne calls.
+    let p1, p2, p3
+    p1 = result.current.classifyOne('prompt-1')
+    p2 = result.current.classifyOne('prompt-2')
+    p3 = result.current.classifyOne('prompt-3')
+
+    // Only the first task should be in flight.
+    await waitFor(() => expect(postedMessages.length).toBe(1))
+    expect(postedMessages[0].prompt).toBe('prompt-1')
+
+    // Settle task 1; task 2 should now post.
+    dispatchDone(postedMessages[0].taskId, 'LIKELY | one')
+    await p1
+    await waitFor(() => expect(postedMessages.length).toBe(2))
+    expect(postedMessages[1].prompt).toBe('prompt-2')
+
+    // Settle task 2; task 3 posts.
+    dispatchDone(postedMessages[1].taskId, 'UNLIKELY | two')
+    await p2
+    await waitFor(() => expect(postedMessages.length).toBe(3))
+    expect(postedMessages[2].prompt).toBe('prompt-3')
+
+    // Settle task 3.
+    dispatchDone(postedMessages[2].taskId, 'LIKELY | three')
+    const r3 = await p3
+    expect(r3.raw).toBe('LIKELY | three')
+  })
+
+  it('does not poison the queue when one task rejects', async () => {
+    const { result } = renderHook(() => useClassifier())
+
+    const p1 = result.current.classifyOne('prompt-A')
+    const p2 = result.current.classifyOne('prompt-B')
+
+    await waitFor(() => expect(postedMessages.length).toBe(1))
+
+    // Reject task 1 via classify_error.
+    capturedListener({ data: { type: 'classify_error', taskId: postedMessages[0].taskId, message: 'boom' } })
+    await expect(p1).rejects.toThrow('boom')
+
+    // Task 2 should still post and resolve.
+    await waitFor(() => expect(postedMessages.length).toBe(2))
+    dispatchDone(postedMessages[1].taskId, 'LIKELY | recovered')
+    const r2 = await p2
+    expect(r2.raw).toBe('LIKELY | recovered')
+  })
+
+  it('rejects pending tasks when the hook unmounts', async () => {
+    const { result, unmount } = renderHook(() => useClassifier())
+
+    const p1 = result.current.classifyOne('prompt-pending')
+    await waitFor(() => expect(postedMessages.length).toBe(1))
+
+    // Mid-flight: unmount.
+    act(() => unmount())
+
+    await expect(p1).rejects.toThrow(/unmounted/)
+  })
+})
diff --git a/src/hooks/useIsMobile.js b/src/hooks/useIsMobile.js
new file mode 100644
index 0000000..a5fb4ee
--- /dev/null
+++ b/src/hooks/useIsMobile.js
@@ -0,0 +1,21 @@
+import { useEffect, useState } from 'react'
+
+export const MOBILE_BREAKPOINT_PX = 820
+
+// matchMedia (not 'resize'): iOS Safari fires 'resize' inconsistently on
+// rotation; matchMedia.change is the reliable signal. Also catches iPad
+// split-screen and browser-window mode switches without a manual resize.
+export function useIsMobile() {
+  const query = `(max-width: ${MOBILE_BREAKPOINT_PX}px)`
+  const [isMobile, setIsMobile] = useState(() =>
+    typeof window !== 'undefined' && window.matchMedia(query).matches
+  )
+  useEffect(() => {
+    const mq = window.matchMedia(query)
+    const onChange = (e) => setIsMobile(e.matches)
+    mq.addEventListener('change', onChange)
+    return () => mq.removeEventListener('change', onChange)
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [])
+  return isMobile
+}
diff --git a/src/test/setup.js b/src/test/setup.js
index 7c891de..d8865f2 100644
--- a/src/test/setup.js
+++ b/src/test/setup.js
@@ -1,7 +1,23 @@
 import '@testing-library/jest-dom'
-import { afterEach } from 'vitest'
+import { afterEach, vi } from 'vitest'
 import { cleanup } from '@testing-library/react'
 
+// jsdom doesn't ship matchMedia; ResultsList uses it for the mobile
+// breakpoint detector. Stub it to "desktop" (does-not-match) by default
+// so the two-pane code path renders in tests.
+if (typeof window !== 'undefined' && !window.matchMedia) {
+  window.matchMedia = vi.fn().mockImplementation((query) => ({
+    matches: false,
+    media: query,
+    onchange: null,
+    addEventListener: vi.fn(),
+    removeEventListener: vi.fn(),
+    addListener: vi.fn(),       // legacy
+    removeListener: vi.fn(),    // legacy
+    dispatchEvent: vi.fn(),
+  }))
+}
+
 afterEach(() => {
   cleanup()
 })
diff --git a/src/utils/classifyTrial.js b/src/utils/classifyTrial.js
new file mode 100644
index 0000000..bb9b86d
--- /dev/null
+++ b/src/utils/classifyTrial.js
@@ -0,0 +1,77 @@
+// Stage-1 classification prompt + verdict parser.
+// Shared between the harness (?test=classify) and the in-app
+// classifyAll flow in ResultsList so the model sees the same prompt
+// in both contexts. Keep the wording in sync with the validated
+// harness baseline (Qwen2.5-1.5B → ~83% binary agreement, 0
+// catastrophic UNLIKELY).
+
+export const DEFAULT_CLASSIFY_PROMPT = `You decide whether a clinical trial is worth showing to a patient. Output one of two labels:
+
+- LIKELY: the trial studies the patient's condition AND nothing in the eligibility clearly excludes the patient based on what they stated. Worth showing.
+- UNLIKELY: the trial studies a different disease, OR the patient is clearly the wrong sex / age / population. Not worth showing.
+
+Be inclusive on LIKELY: if the trial requires a subtype, biomarker, stage, or prior treatment the patient did NOT mention, still call it LIKELY — the patient or their doctor can verify. Only use UNLIKELY when the patient is clearly disqualified by something they DID state.
+
+Examples (note: each example uses a DIFFERENT patient — focus on the reasoning, not the patient details):
+
+Patient: "45-year-old woman with ovarian cancer"
+Trial: PARP Inhibitor in BRCA-Mutated Ovarian Cancer (Eligibility: women with ovarian cancer and BRCA mutation)
+Answer: LIKELY | matches ovarian cancer in a woman; BRCA status can be verified
+
+Patient: "70-year-old man with type 2 diabetes"
+Trial: Tamoxifen in Premenopausal Breast Cancer (Eligibility: premenopausal women with breast cancer)
+Answer: UNLIKELY | trial is for breast cancer in women; patient has diabetes
+
+Patient: "8-year-old child with asthma"
+Trial: Adult Anti-Inflammatory for Asthma (Eligibility: adults 18+ with persistent asthma)
+Answer: UNLIKELY | trial is for adults; patient is a child
+
+Patient: "55-year-old man with hypertension"
+Trial: Yoga Intervention for Adults with Chronic Conditions (Eligibility: adults 40-75 with any chronic condition)
+Answer: LIKELY | adult with chronic condition matches the broad inclusion
+
+Now classify:
+
+Patient: {{user}}
+Trial: {{title}}
+Eligibility: {{eligibility}}
+
+Answer (one line, format exactly "<LABEL> | <one short reason>"):`
+
+export const ELIG_MAX_CHARS = 1500
+
+export function buildClassifyPrompt(userDesc, trial, eligMax = ELIG_MAX_CHARS) {
+  // Reuse the trial.eligibility (string) when present; fall back to the
+  // structured eligibility.criteria field that useClinicalTrials emits.
+  const elig = (
+    typeof trial.eligibility === 'string'
+      ? trial.eligibility
+      : trial.eligibility?.criteria ?? ''
+  ).slice(0, eligMax)
+  const title = trial.title || trial.briefTitle || ''
+  return DEFAULT_CLASSIFY_PROMPT
+    .replace('{{user}}', userDesc ?? '')
+    .replace('{{title}}', title)
+    .replace('{{eligibility}}', elig)
+}
+
+// Parser still accepts POSSIBLE in case the model emits it (older prompts,
+// instruction drift) — POSSIBLE is normalized to LIKELY since the binary
+// product question is "show or hide".
+export function parseVerdict(raw) {
+  if (!raw || typeof raw !== 'string') return { verdict: 'PARSE_FAIL', reason: '(empty output)' }
+  const m = raw.match(/^\s*(LIKELY|POSSIBLE|UNLIKELY)\s*[|:\-—]\s*(.+?)\s*$/im)
+  if (m) {
+    const v = m[1].toUpperCase()
+    return { verdict: v === 'POSSIBLE' ? 'LIKELY' : v, reason: m[2].trim() }
+  }
+  const w = raw.match(/\b(LIKELY|POSSIBLE|UNLIKELY)\b/i)
+  if (w) {
+    const v = w[1].toUpperCase()
+    return {
+      verdict: v === 'POSSIBLE' ? 'LIKELY' : v,
+      reason: raw.replace(w[0], '').replace(/^[\s|:\-—]+/, '').trim() || '(no reason)',
+    }
+  }
+  return { verdict: 'PARSE_FAIL', reason: raw.slice(0, 120) }
+}
diff --git a/src/utils/nlpModels.js b/src/utils/nlpModels.js
index 67f9eee..17e286a 100644
--- a/src/utils/nlpModels.js
+++ b/src/utils/nlpModels.js
@@ -9,6 +9,14 @@ export const NLP_MODELS = {
     sizeLabel: '~1.3 GB',
     isThinking: false,
   },
+  // Gemma 2 2B q4f16_1 was tried as a faster alternative (~30% lower
+  // latency from native-fp16 WebGPU compute) but the lower activation
+  // precision degraded the simplifier's structured output — section
+  // headers ("## What this study is testing" / "## Who can join") came
+  // out malformed, and Gemma can only have ONE quant loaded at a time
+  // so we can't mix q4f16_1 for classification and q4f32_1 for
+  // simplification. q4f32_1 stays as the sole Gemma 2 2B variant.
+
   qwen3: {
     id: 'Qwen3-1.7B-q4f32_1-MLC',
     label: 'Qwen3 1.7B',
@@ -17,6 +25,20 @@ export const NLP_MODELS = {
     // <think>…</think> block before the answer, which breaks JSON parsing.
     isThinking: true,
   },
+  llama32: {
+    id: 'Llama-3.2-3B-Instruct-q4f16_1-MLC',
+    label: 'Llama 3.2 3B',
+    // q4f16_1 instead of q4f32_1: smaller (~1.9 GB vs ~2.4 GB) and faster on
+    // most GPUs with effectively no quality difference for instruction tasks.
+    sizeLabel: '~1.9 GB',
+    isThinking: false,
+  },
+  qwen25: {
+    id: 'Qwen2.5-1.5B-Instruct-q4f16_1-MLC',
+    label: 'Qwen2.5 1.5B',
+    sizeLabel: '~900 MB',
+    isThinking: false,
+  },
 }
 
 export const DEFAULT_MODEL_KEY = 'gemma'
diff --git a/src/workers/nlp.worker.js b/src/workers/nlp.worker.js
index 259237b..d541141 100644
--- a/src/workers/nlp.worker.js
+++ b/src/workers/nlp.worker.js
@@ -30,18 +30,40 @@ self.onmessage = async (event) => {
     loading = true
     isThinkingModel = Boolean(isThinking)
     try {
-      const { CreateMLCEngine } = await import('@mlc-ai/web-llm')
+      const { CreateMLCEngine /* , prebuiltAppConfig */ } = await import('@mlc-ai/web-llm')
       // CreateMLCEngine signature: (modelId, engineConfig, chatOpts).
       // chatOpts is per-model config override (e.g. sliding_window_size:-1
       // for gemma3, whose prebuilt record sets context_window_size:4096
       // alongside sliding_window_size:512 — the engine rejects both being
       // positive).
+      //
+      // ─── Custom model wiring (stub) ──────────────────────────────────
+      // To serve a fine-tuned model (e.g. a domain-specific LoRA merged
+      // back into Qwen2.5-1.5B), uncomment the appConfig block below and
+      // add a matching entry to nlpModels.js with model_id matching the
+      // one here. The model and model_lib URLs must be CORS-accessible —
+      // HuggingFace Hub serves MLC artifacts with the right headers; a
+      // self-hosted bucket needs explicit CORS config. See
+      // ~/Documents/Github/sevry_vault/Work/ClaudeCode/iris/lora-training-process.md
+      // for the end-to-end LoRA → MLC → WebLLM pipeline.
+      //
+      // const appConfig = {
+      //   model_list: [
+      //     ...prebuiltAppConfig.model_list,
+      //     {
+      //       model: 'https://huggingface.co/USER/iris-classifier-q4f16_1-MLC/resolve/main/',
+      //       model_id: 'iris-classifier-q4f16_1-MLC',
+      //       model_lib: 'https://huggingface.co/USER/iris-classifier-q4f16_1-MLC/resolve/main/iris-classifier-q4f16_1-ctx4k_cs1k-webgpu.wasm',
+      //     },
+      //   ],
+      // }
       engine = await CreateMLCEngine(
         modelId ?? DEFAULT_MODEL_ID,
         {
           initProgressCallback: (progress) => {
             self.postMessage({ type: 'progress', progress })
           },
+          // appConfig, // ← uncomment alongside the block above
         },
         chatOpts ?? undefined,
       )
@@ -108,6 +130,63 @@ self.onmessage = async (event) => {
     return
   }
 
+  if (type === 'translate') {
+    if (!engine) {
+      self.postMessage({ type: 'translate_error', taskId, message: 'Engine not loaded' })
+      return
+    }
+    try {
+      const t0 = Date.now()
+      if (typeof engine.resetChat === 'function') {
+        try { await engine.resetChat() } catch { /* best effort */ }
+      }
+      // Translation typically needs more headroom than classification (one
+      // verdict word + reason fits in 80; a paraphrased clinical sentence
+      // can run 100-200 tokens for verbose languages). Same low temperature
+      // since we want fidelity, not creativity.
+      const request = {
+        messages: [{ role: 'user', content: prompt }],
+        max_tokens: 200,
+        temperature: 0.1,
+      }
+      if (isThinkingModel) request.extra_body = { enable_thinking: false }
+      const reply = await engine.chat.completions.create(request)
+      const raw = reply.choices?.[0]?.message?.content ?? ''
+      self.postMessage({ type: 'translate_done', taskId, raw, latencyMs: Date.now() - t0 })
+    } catch (err) {
+      self.postMessage({ type: 'translate_error', taskId, message: err?.message ?? String(err) })
+    }
+    return
+  }
+
+  if (type === 'classify') {
+    if (!engine) {
+      self.postMessage({ type: 'classify_error', taskId, message: 'Engine not loaded' })
+      return
+    }
+    try {
+      const t0 = Date.now()
+      // Reset KV cache between classifications so they're independent.
+      if (typeof engine.resetChat === 'function') {
+        try { await engine.resetChat() } catch { /* best effort */ }
+      }
+      const request = {
+        messages: [{ role: 'user', content: prompt }],
+        // Stage-1 verdict + one-sentence reason fits comfortably in ~60 tokens.
+        // Generous headroom (80) covers preamble drift from the smaller models.
+        max_tokens: 80,
+        temperature: 0.1,
+      }
+      if (isThinkingModel) request.extra_body = { enable_thinking: false }
+      const reply = await engine.chat.completions.create(request)
+      const raw = reply.choices?.[0]?.message?.content ?? ''
+      self.postMessage({ type: 'classify_done', taskId, raw, latencyMs: Date.now() - t0 })
+    } catch (err) {
+      self.postMessage({ type: 'classify_error', taskId, message: err?.message ?? String(err) })
+    }
+    return
+  }
+
   if (type === 'summarize' || type === 'assess_fit') {
     if (!engine) {
       self.postMessage({ type: 'task_error', taskId, message: 'Engine not loaded' })
@@ -134,7 +213,13 @@ self.onmessage = async (event) => {
         // its hedging language ("may", "might") doesn't collapse into a
         // single deterministic phrase across trials.
         temperature: type === 'summarize' ? 0.1 : 0.2,
-        frequency_penalty: type === 'summarize' ? 0.3 : 0,
+        // Bumped from 0.3 → 0.6 because Gemma 2 2B was hitting degenerate
+        // loops on the simplify prompt — emitting strings of "##" header
+        // markers ("############# ## ## ## …") instead of the body content.
+        // Higher frequency penalty discourages the same n-gram from
+        // re-firing, breaking the loop. Assess-fit stays at 0 so its
+        // hedging language ("may", "might") doesn't get penalized.
+        frequency_penalty: type === 'summarize' ? 0.6 : 0,
         stream: true,
       }
       if (isThinkingModel) request.extra_body = { enable_thinking: false }