From bc46c72d2c11490ca5cb37338e917c0c7218cc46 Mon Sep 17 00:00:00 2001
From: Nadir <info@getnadir.com>
Date: Fri, 29 May 2026 09:08:47 -0400
Subject: [PATCH] v0.19.2: Fix TrainedVerifier input format to match production

NadirClaw's TrainedVerifier was passing the cheap answer as the bare
text_pair to the tokenizer. The model was trained on a structured format
with CHEAP:/EXPENSIVE: markers, matching what the Pro production backend
uses. Without that wrapper, scores are miscalibrated against the
production tau=0.80 threshold.

This patch wraps the input in the production format:

  text_pair = f"CHEAP:\n{cheap}\n\nEXPENSIVE:\n{reference or ''}"

reference_answer is now used when provided (was previously documented as
ignored). Behavior with reference_answer=None matches production: empty
string substitution.

Aligns NadirClaw with:
- https://huggingface.co/nadirclaw/cascade-verifier-v1 (model card)
- getnadir.dev/backend/app/services/verifier_model.py (production)

Repo: https://github.com/NadirRouter/NadirClaw
Service: https://getnadir.com
---
 nadirclaw/__init__.py          |  2 +-
 nadirclaw/trained_verifier.py  | 33 ++++++++++----
 tests/test_trained_verifier.py | 79 ++++++++++++++++++++++++++++++++++
 3 files changed, 105 insertions(+), 9 deletions(-)

diff --git a/nadirclaw/__init__.py b/nadirclaw/__init__.py
index 9a92fda..f887770 100644
--- a/nadirclaw/__init__.py
+++ b/nadirclaw/__init__.py
@@ -1,3 +1,3 @@
 """NadirClaw — Open-source LLM router."""
 
-__version__ = "0.19.1"
+__version__ = "0.19.2"
diff --git a/nadirclaw/trained_verifier.py b/nadirclaw/trained_verifier.py
index 4c83a12..644c205 100644
--- a/nadirclaw/trained_verifier.py
+++ b/nadirclaw/trained_verifier.py
@@ -32,9 +32,12 @@
     >>> result = v.score(prompt, cheap_answer)
     >>> result.score, result.accepted        # float in [0, 1], bool
 
-The ``reference_answer`` and ``expect_json`` arguments are accepted for
-parity with ``HeuristicVerifier`` but are currently ignored — the
-trained model scores ``(prompt, cheap_answer)`` only.
+The ``reference_answer`` argument, when provided, is folded into the
+structured ``text_pair`` the cross-encoder was trained on (see
+``score()``). When ``None``, an empty ``EXPENSIVE:`` block is
+substituted, matching the production backend's behaviour. The
+``expect_json`` argument is accepted for parity with
+``HeuristicVerifier`` but is currently ignored by the trained model.
 
 Dependencies
 ------------
@@ -219,10 +222,16 @@ def score(
     ) -> TrainedScore:
         """Score how acceptable ``cheap_answer`` is for ``prompt``.
 
-        ``reference_answer`` and ``expect_json`` are accepted for
-        interface parity with ``HeuristicVerifier`` and are currently
-        ignored by the trained model. The cross-encoder was trained
-        on ``(prompt, cheap_answer)`` pairs only.
+        The cross-encoder was trained on inputs of the form
+        ``(prompt, "CHEAP:\\n{cheap}\\n\\nEXPENSIVE:\\n{reference}")``.
+        ``reference_answer`` is folded into the structured ``text_pair``
+        when provided; when ``None`` an empty ``EXPENSIVE:`` block is
+        substituted, matching the production backend at
+        ``getnadir.dev/backend/app/services/verifier_model.py``.
+
+        ``expect_json`` is accepted for interface parity with
+        ``HeuristicVerifier`` and is currently ignored by the trained
+        model.
         """
         self._ensure_loaded()
 
@@ -240,9 +249,17 @@ def score(
 
         import torch
 
+        # Match the training format used by the production backend
+        # (getnadir.dev/backend/app/services/verifier_model.py) and
+        # documented on the HuggingFace model card. Without the
+        # ``CHEAP:``/``EXPENSIVE:`` wrapper the scores drift against
+        # the calibrated tau=0.80 acceptance threshold.
+        text_pair = (
+            f"CHEAP:\n{cheap}\n\nEXPENSIVE:\n{(reference_answer or '').strip()}"
+        )
         enc = self._tokenizer(
             prompt or "",
-            cheap,
+            text_pair,
             truncation=True,
             max_length=_MAX_SEQ_LEN,
             padding=False,
diff --git a/tests/test_trained_verifier.py b/tests/test_trained_verifier.py
index 606d0db..3c3c7da 100644
--- a/tests/test_trained_verifier.py
+++ b/tests/test_trained_verifier.py
@@ -148,6 +148,85 @@ def test_trained_verifier_interface_matches_heuristic():
         assert {"score", "accepted", "threshold", "reasons", "verifier"} <= d.keys()
 
 
+def test_trained_verifier_wraps_input_in_production_format():
+    """The tokenizer must receive ``text_pair`` wrapped in the
+    ``CHEAP:\\n...\\n\\nEXPENSIVE:\\n...`` format the cross-encoder was
+    trained on. Without this wrapper, scores drift against the
+    calibrated tau=0.80 threshold.
+
+    Production reference:
+      ``getnadir.dev/backend/app/services/verifier_model.py:195``
+    """
+    from nadirclaw.trained_verifier import TrainedVerifier
+
+    captured: dict = {}
+
+    class _FakeEncoding(dict):
+        def __init__(self):
+            super().__init__()
+            # Minimal tensor-like values so the .to(device) loop works.
+            class _T:
+                def to(self, _device):
+                    return self
+
+            self["input_ids"] = _T()
+            self["attention_mask"] = _T()
+
+    class _FakeTokenizer:
+        def __call__(self, prompt, text_pair, **kwargs):
+            captured["prompt"] = prompt
+            captured["text_pair"] = text_pair
+            captured["kwargs"] = kwargs
+            return _FakeEncoding()
+
+    class _FakeLogits:
+        # Two-class head; softmax([0, 0]) => probs[..., 1] == 0.5
+        shape = (1, 2)
+
+        def __init__(self):
+            import torch
+            self._t = torch.tensor([[0.0, 0.0]])
+
+        def __getattr__(self, name):
+            return getattr(self._t, name)
+
+    class _FakeModelOut:
+        def __init__(self):
+            import torch
+            self.logits = torch.tensor([[0.0, 0.0]])
+
+    class _FakeModel:
+        def __call__(self, **kwargs):
+            return _FakeModelOut()
+
+        def eval(self):
+            return self
+
+        def to(self, _device):
+            return self
+
+    v = TrainedVerifier(threshold=0.8, device="cpu")
+    v._tokenizer = _FakeTokenizer()
+    v._model = _FakeModel()
+    v._resolved_device = "cpu"
+
+    # Case 1: reference_answer provided.
+    out = v.score("What is 2+2?", "4", reference_answer="four")
+    assert captured["prompt"] == "What is 2+2?"
+    assert captured["text_pair"] == "CHEAP:\n4\n\nEXPENSIVE:\nfour"
+    assert 0.0 <= out.score <= 1.0
+
+    # Case 2: reference_answer=None -> empty EXPENSIVE: block.
+    captured.clear()
+    v.score("What is 2+2?", "4")
+    assert captured["text_pair"] == "CHEAP:\n4\n\nEXPENSIVE:\n"
+
+    # Case 3: reference_answer is whitespace-only -> stripped to empty.
+    captured.clear()
+    v.score("What is 2+2?", "4", reference_answer="   \n  ")
+    assert captured["text_pair"] == "CHEAP:\n4\n\nEXPENSIVE:\n"
+
+
 def test_trained_verifier_get_singleton_caches():
     """The module-level singleton accessor should cache same-threshold calls
     and return fresh instances for mismatched thresholds. Construction