NVIDIA-NeMo · SandyChapman · Apr 29, 2026 · May 4, 2026 · SandyChapman · May 4, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,15 @@
 
 ## 0.13.0 (unreleased)
 
+### Shared Metric Contract
+
+- Added public `MetricInput -> MetricResult` scorer/metric runtime types and `ScorerFunctionMetric`.
+- Extended BYOB `@scorer` with typed scorer metadata and `to_metric()` while preserving current dict scorer behavior.
+- Added optional `config_schema` support for typed scorer configs while keeping raw dict configs as the default.
+- Split typed scorer config binding into strict `bind(config=ConfigModel(...))` and coercive `bind_raw_config(config={...})` paths.
+- Added `@scorer` support for class-based `Metric` objects.
+- Added a reusable undecorated `ExactMatchMetric` and an `ExactMatchScorer` BYOB wrapper.
+
 ### Adapter Proxy (Breaking — replaces LiteLLM)
 
 - **LiteLLM removed**: The `litellm` dependency, `proxy` and `proxy-full` extras, and `litellm_settings` config field are all removed. The adapter proxy is now built-in with zero external proxy dependencies.

diff --git a/examples/benchmarks/exact_match_metric_poc.py b/examples/benchmarks/exact_match_metric_poc.py
@@ -0,0 +1,59 @@
+# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Example BYOB benchmark using a class-based Metric as its scorer."""
+
+from nemo_evaluator.environments.custom import benchmark, scorer
+from nemo_evaluator.metrics import ExactMatchMetric
+from nemo_evaluator.scorers import ExactMatchScorer
+from nemo_evaluator.scoring import MetricInput, MetricOutput, MetricOutputSpec, MetricResult
+
+
+def _dataset() -> list[dict[str, str]]:
+    return [
+        {"question": "What is the capital of France?", "answer": "Paris"},
+        {"question": "What is 2 + 2?", "answer": "4"},
+    ]
+
+
+# Mode 1: use a preannotated scorer wrapper exported by the OSS scorer layer.
+benchmark(
+    name="exact-match-preannotated-scorer-poc",
+    dataset=_dataset,
+    prompt="{question}",
+    target_field="answer",
+)(ExactMatchScorer(reference="{{item.answer}}"))
+
+
+class InlineExactMatchMetric:
+    type = "inline-exact-match"
+
+    def __init__(self, *, reference: str, candidate: str | None = None) -> None:
+        self.reference = reference
+        self.candidate = candidate
+
+    def output_spec(self) -> list[MetricOutputSpec]:
+        return [MetricOutputSpec.continuous_score("correct")]
+
+    async def compute_scores(self, input: MetricInput) -> MetricResult:
+        reference = input.row.data.get("answer") if self.reference == "{{item.answer}}" else self.reference
+        candidate = input.candidate.output_text if self.candidate is None else self.candidate
+        correct = 1.0 if candidate == reference else 0.0
+        return MetricResult(outputs=[MetricOutput(name="correct", value=correct)])
+
+
+# Mode 2: annotate a local class at the benchmark call site, then configure it.
+benchmark(
+    name="exact-match-inline-class-scorer-poc",
+    dataset=_dataset,
+    prompt="{question}",
+    target_field="answer",
+)(scorer(InlineExactMatchMetric)(reference="{{item.answer}}"))
+
+
+# Mode 3: adapt an already-configured reusable metric instance at the call site.
+benchmark(
+    name="exact-match-metric-instance-poc",
+    dataset=_dataset,
+    prompt="{question}",
+    target_field="answer",
+)(scorer(ExactMatchMetric(reference="{{item.answer}}")))
diff --git a/src/nemo_evaluator/__init__.py b/src/nemo_evaluator/__init__.py
@@ -16,22 +16,32 @@
 
 __version__ = "0.12.0"
 
+from nemo_evaluator.engine.eval_loop import run_evaluation
+from nemo_evaluator.engine.model_client import ModelClient
 from nemo_evaluator.environments.base import EvalEnvironment, SeedResult, VerifyResult
 from nemo_evaluator.environments.custom import benchmark, scorer
 from nemo_evaluator.environments.registry import get_environment, list_environments, load_benchmark_file, register
-from nemo_evaluator.engine.eval_loop import run_evaluation
-from nemo_evaluator.engine.model_client import ModelClient
-from nemo_evaluator.solvers import (
-    ChatSolver,
-    CompletionSolver,
-    NatSolver,
-    OpenClawSolver,
-    Solver,
-    SolveResult,
-    VLMSolver,
-)
+from nemo_evaluator.metrics import ExactMatchMetric
+from nemo_evaluator.scorers import ExactMatchScorer
 from nemo_evaluator.scoring import (
+    BooleanValue,
+    CandidateOutput,
+    ContinuousScore,
+    DatasetRow,
+    DiscreteScore,
+    Label,
+    Metric,
+    MetricDescriptor,
+    MetricInput,
+    MetricOutput,
+    MetricOutputSpec,
+    MetricResult,
+    MetricScorerFunction,
+    ScorerCallable,
+    ScorerConfig,
+    ScorerFunctionMetric,
     ScorerInput,
+    ScorerReturn,
     answer_line,
     code_sandbox,
     code_sandbox_async,
@@ -40,6 +50,16 @@
     multichoice_regex,
     needs_judge,
     numeric_match,
+    score_names_from_output_spec,
+)
+from nemo_evaluator.solvers import (
+    ChatSolver,
+    CompletionSolver,
+    NatSolver,
+    OpenClawSolver,
+    Solver,
+    SolveResult,
+    VLMSolver,
 )
 
 __all__ = [
@@ -65,6 +85,26 @@
     "benchmark",
     "scorer",
     "ScorerInput",
+    "ExactMatchMetric",
+    "ExactMatchScorer",
+    "Metric",
+    "BooleanValue",
+    "DatasetRow",
+    "CandidateOutput",
+    "ContinuousScore",
+    "DiscreteScore",
+    "Label",
+    "MetricInput",
+    "MetricOutput",
+    "MetricOutputSpec",
+    "MetricDescriptor",
+    "MetricResult",
+    "MetricScorerFunction",
+    "ScorerCallable",
+    "ScorerConfig",
+    "ScorerFunctionMetric",
+    "ScorerReturn",
+    "score_names_from_output_spec",
     # Scoring primitives
     "exact_match",
     "multichoice_regex",