From bc46c72d2c11490ca5cb37338e917c0c7218cc46 Mon Sep 17 00:00:00 2001 From: Nadir Date: Fri, 29 May 2026 09:08:47 -0400 Subject: [PATCH] v0.19.2: Fix TrainedVerifier input format to match production NadirClaw's TrainedVerifier was passing the cheap answer as the bare text_pair to the tokenizer. The model was trained on a structured format with CHEAP:/EXPENSIVE: markers, matching what the Pro production backend uses. Without that wrapper, scores are miscalibrated against the production tau=0.80 threshold. This patch wraps the input in the production format: text_pair = f"CHEAP:\n{cheap}\n\nEXPENSIVE:\n{reference or ''}" reference_answer is now used when provided (was previously documented as ignored). Behavior with reference_answer=None matches production: empty string substitution. Aligns NadirClaw with: - https://huggingface.co/nadirclaw/cascade-verifier-v1 (model card) - getnadir.dev/backend/app/services/verifier_model.py (production) Repo: https://github.com/NadirRouter/NadirClaw Service: https://getnadir.com --- nadirclaw/__init__.py | 2 +- nadirclaw/trained_verifier.py | 33 ++++++++++---- tests/test_trained_verifier.py | 79 ++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+), 9 deletions(-) diff --git a/nadirclaw/__init__.py b/nadirclaw/__init__.py index 9a92fda..f887770 100644 --- a/nadirclaw/__init__.py +++ b/nadirclaw/__init__.py @@ -1,3 +1,3 @@ """NadirClaw — Open-source LLM router.""" -__version__ = "0.19.1" +__version__ = "0.19.2" diff --git a/nadirclaw/trained_verifier.py b/nadirclaw/trained_verifier.py index 4c83a12..644c205 100644 --- a/nadirclaw/trained_verifier.py +++ b/nadirclaw/trained_verifier.py @@ -32,9 +32,12 @@ >>> result = v.score(prompt, cheap_answer) >>> result.score, result.accepted # float in [0, 1], bool -The ``reference_answer`` and ``expect_json`` arguments are accepted for -parity with ``HeuristicVerifier`` but are currently ignored — the -trained model scores ``(prompt, cheap_answer)`` only. +The ``reference_answer`` argument, when provided, is folded into the +structured ``text_pair`` the cross-encoder was trained on (see +``score()``). When ``None``, an empty ``EXPENSIVE:`` block is +substituted, matching the production backend's behaviour. The +``expect_json`` argument is accepted for parity with +``HeuristicVerifier`` but is currently ignored by the trained model. Dependencies ------------ @@ -219,10 +222,16 @@ def score( ) -> TrainedScore: """Score how acceptable ``cheap_answer`` is for ``prompt``. - ``reference_answer`` and ``expect_json`` are accepted for - interface parity with ``HeuristicVerifier`` and are currently - ignored by the trained model. The cross-encoder was trained - on ``(prompt, cheap_answer)`` pairs only. + The cross-encoder was trained on inputs of the form + ``(prompt, "CHEAP:\\n{cheap}\\n\\nEXPENSIVE:\\n{reference}")``. + ``reference_answer`` is folded into the structured ``text_pair`` + when provided; when ``None`` an empty ``EXPENSIVE:`` block is + substituted, matching the production backend at + ``getnadir.dev/backend/app/services/verifier_model.py``. + + ``expect_json`` is accepted for interface parity with + ``HeuristicVerifier`` and is currently ignored by the trained + model. """ self._ensure_loaded() @@ -240,9 +249,17 @@ def score( import torch + # Match the training format used by the production backend + # (getnadir.dev/backend/app/services/verifier_model.py) and + # documented on the HuggingFace model card. Without the + # ``CHEAP:``/``EXPENSIVE:`` wrapper the scores drift against + # the calibrated tau=0.80 acceptance threshold. + text_pair = ( + f"CHEAP:\n{cheap}\n\nEXPENSIVE:\n{(reference_answer or '').strip()}" + ) enc = self._tokenizer( prompt or "", - cheap, + text_pair, truncation=True, max_length=_MAX_SEQ_LEN, padding=False, diff --git a/tests/test_trained_verifier.py b/tests/test_trained_verifier.py index 606d0db..3c3c7da 100644 --- a/tests/test_trained_verifier.py +++ b/tests/test_trained_verifier.py @@ -148,6 +148,85 @@ def test_trained_verifier_interface_matches_heuristic(): assert {"score", "accepted", "threshold", "reasons", "verifier"} <= d.keys() +def test_trained_verifier_wraps_input_in_production_format(): + """The tokenizer must receive ``text_pair`` wrapped in the + ``CHEAP:\\n...\\n\\nEXPENSIVE:\\n...`` format the cross-encoder was + trained on. Without this wrapper, scores drift against the + calibrated tau=0.80 threshold. + + Production reference: + ``getnadir.dev/backend/app/services/verifier_model.py:195`` + """ + from nadirclaw.trained_verifier import TrainedVerifier + + captured: dict = {} + + class _FakeEncoding(dict): + def __init__(self): + super().__init__() + # Minimal tensor-like values so the .to(device) loop works. + class _T: + def to(self, _device): + return self + + self["input_ids"] = _T() + self["attention_mask"] = _T() + + class _FakeTokenizer: + def __call__(self, prompt, text_pair, **kwargs): + captured["prompt"] = prompt + captured["text_pair"] = text_pair + captured["kwargs"] = kwargs + return _FakeEncoding() + + class _FakeLogits: + # Two-class head; softmax([0, 0]) => probs[..., 1] == 0.5 + shape = (1, 2) + + def __init__(self): + import torch + self._t = torch.tensor([[0.0, 0.0]]) + + def __getattr__(self, name): + return getattr(self._t, name) + + class _FakeModelOut: + def __init__(self): + import torch + self.logits = torch.tensor([[0.0, 0.0]]) + + class _FakeModel: + def __call__(self, **kwargs): + return _FakeModelOut() + + def eval(self): + return self + + def to(self, _device): + return self + + v = TrainedVerifier(threshold=0.8, device="cpu") + v._tokenizer = _FakeTokenizer() + v._model = _FakeModel() + v._resolved_device = "cpu" + + # Case 1: reference_answer provided. + out = v.score("What is 2+2?", "4", reference_answer="four") + assert captured["prompt"] == "What is 2+2?" + assert captured["text_pair"] == "CHEAP:\n4\n\nEXPENSIVE:\nfour" + assert 0.0 <= out.score <= 1.0 + + # Case 2: reference_answer=None -> empty EXPENSIVE: block. + captured.clear() + v.score("What is 2+2?", "4") + assert captured["text_pair"] == "CHEAP:\n4\n\nEXPENSIVE:\n" + + # Case 3: reference_answer is whitespace-only -> stripped to empty. + captured.clear() + v.score("What is 2+2?", "4", reference_answer=" \n ") + assert captured["text_pair"] == "CHEAP:\n4\n\nEXPENSIVE:\n" + + def test_trained_verifier_get_singleton_caches(): """The module-level singleton accessor should cache same-threshold calls and return fresh instances for mismatched thresholds. Construction