delftdata · kPsarakis · May 5, 2026 · May 4, 2026 · May 4, 2026 · May 5, 2026
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -8,35 +8,49 @@ on:
 
 jobs:
   bench:
-    runs-on: ubuntu-latest
-    timeout-minutes: 10
+    name: bench (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 15
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ['3.14']
 
     steps:
       - uses: actions/checkout@v6.0.2
 
       - name: Set up Python
         uses: actions/setup-python@v6.2.0
         with:
-          python-version: '3.14'
+          python-version: ${{ matrix.python-version }}
           cache: 'pip'
 
+      - name: Set deterministic environment
+        run: |
+          echo "OMP_NUM_THREADS=1" >> $GITHUB_ENV
+          echo "MKL_NUM_THREADS=1" >> $GITHUB_ENV
+          echo "PYTHONHASHSEED=0" >> $GITHUB_ENV
+
       - name: Install package
         run: |
           python -m pip install --upgrade pip
           pip install .
 
       - name: Run accuracy regression check
+        shell: bash
         run: |
           python experiments/bench.py \
             --quick \
-            --output bench_results.json \
+            --output bench_results_${{ matrix.os }}.json \
             --baseline experiments/bench_baseline.json \
             --accuracy-only
 
       - name: Upload bench results
         if: always()
         uses: actions/upload-artifact@v4
         with:
-          name: bench-results
-          path: bench_results.json
-          if-no-files-found: warn
+          name: bench-results-${{ matrix.os }}
+          path: bench_results_${{ matrix.os }}.json
+          if-no-files-found: warn
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -29,6 +29,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install .
           pip install ".[polars]" || true
+          pip install ".[embeddings]"
           pip install pytest==9.0.2 coverage==7.13.5 ruff==0.15.12
 
       - name: Ruff lint (must pass)

diff --git a/.github/workflows/ci-build-test-publish.yml b/.github/workflows/ci-build-test-publish.yml
@@ -61,12 +61,13 @@ jobs:
           python -m pip install --upgrade pip
           pip install .
           pip install ".[polars]" || true
+          pip install ".[embeddings]"
 
       - name: Install test deps
         run: pip install pytest==9.0.2
 
-      - name: Run tests (unittest)
-        run: python -m unittest discover tests
+      - name: Run tests
+        run: python -m pytest -q tests
 
   # 3) Test the built wheel on all OS/Python
   test_wheel:
@@ -106,12 +107,13 @@ jobs:
             pip install dist/*.tar.gz
           fi
           pip install polars || true
+          pip install "sentence-transformers>=2.0,<6.0"
 
       - name: Install test deps
         run: pip install pytest==9.0.2
 
       - name: Run tests (against installed wheel)
-        run: python -m unittest discover tests
+        run: python -m pytest -q tests
 
   # 4) Publish to PyPI on Release (tag publish event)
   publish-to-pypi:

diff --git a/README.md b/README.md
@@ -63,6 +63,12 @@ To enable **Polars** support, install the optional extra:
 pip install valentine[polars]
 ```
 
+To enable the **sentence-transformer embedding** distance for `JaccardDistanceMatcher` (see below), install:
+
+```shell
+pip install valentine[embeddings]
+```
+
 
 ## Usage
 Valentine can be used to find matches among columns of a given pair of pandas or Polars DataFrames. You can even mix pandas and Polars frames in the same call — Valentine auto-detects the frame type.
@@ -89,17 +95,21 @@ In order to do so, the user can choose one of the following matching methods:
           *    **threshold1**(*float*) - The threshold for phase 1 of the method, default is 0.15.
           *    **threshold2**(*float*) - The threshold for phase 2 of the method, default is 0.15.
 
-4.   `JaccardDistanceMatcher(float: threshold_dist)` is a baseline method that uses Jaccard Similarity between columns to assess their correspondence score, optionally enhanced by a string similarity measure of choice.
+4.   `JaccardDistanceMatcher(...)` is a baseline method that scores column pairs by **Tversky** similarity over their value sets (Jaccard by default). Element equality between values can be decided by a configurable string distance function, including a sentence-transformer **embedding** option for semantic matching.
      *    **Parameters**:
-          *    **threshold_dist**(*float*) - Acceptance threshold for assessing two strings as equal, default is 0.8.
-
-          *    **distance_fun**(*StringDistanceFunction*) - String similarity function used to assess whether two strings are equal. The enumeration class type `StringDistanceFunction` can be imported from `valentine.algorithms.jaccard_distance`. Functions currently supported are:
-   		       * `StringDistanceFunction.Levenshtein`: [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance)
+          *    **threshold_dist**(*float*) - Acceptance threshold above which two values are considered equal under the chosen `distance_fun`, default is 0.8. For embeddings, ~0.7 is a typical operating point.
+          *    **distance_fun**(*StringDistanceFunction*) - Per-value similarity function. The enumeration class `StringDistanceFunction` can be imported from `valentine.algorithms.jaccard_distance`. Functions currently supported are:
+   		       * `StringDistanceFunction.Levenshtein`: [Levenshtein distance](https://en.wikipedia.org/wiki/Levenshtein_distance) (default)
                * `StringDistanceFunction.DamerauLevenshtein`: [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance)
                * `StringDistanceFunction.Hamming`: [Hamming distance](https://en.wikipedia.org/wiki/Hamming_distance)
                * `StringDistanceFunction.Jaro`: [Jaro distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance)
                * `StringDistanceFunction.JaroWinkler`: [Jaro-Winkler distance](https://en.wikipedia.org/wiki/Jaro%E2%80%93Winkler_distance)
-              * `StringDistanceFunction.Exact`: String equality `==`
+               * `StringDistanceFunction.Exact`: String equality `==`
+               * `StringDistanceFunction.Embedding`: cosine similarity on sentence-transformer embeddings (requires the `valentine[embeddings]` extra)
+          *    **tversky_alpha**(*float*) / **tversky_beta**(*float*) - Tversky penalty weights for unmatched values on each side (defaults `1.0`, `1.0`). Defaults give Jaccard; `0.5, 0.5` gives Sørensen-Dice; `1.0, 0.0` (or vice versa) gives set containment — useful when one column is expected to be a subset of the other.
+          *    **embedding_model**(*str*) - Sentence-transformers model name when `distance_fun=Embedding` (default `"all-MiniLM-L6-v2"`).
+          *    **embedding_device**(*str* or *None*) - Device override (`"cpu"`, `"cuda"`, `"mps"`). `None` (default) auto-picks: cuda → mps → cpu.
+          *    **embedding_batch_size**(*int* or *None*) - Encode batch size; `None` uses the sentence-transformers default (32). Larger values amortise per-call overhead on capable hardware.
 
 5.   `SimilarityFlooding(Policy: coeff_policy, Formula: formula, StringMatcher: string_matcher)` is the python implementation of the paper [Similarity Flooding: A Versatile Graph Matching Algorithmand its Application to Schema Matching](https://ieeexplore.ieee.org/document/994702)
      * **Parameters**:
@@ -137,11 +147,17 @@ for pair, score in matches.items():
 ```python
 top_n_matches = matches.take_top_n(5)
 top_n_percent_matches = matches.take_top_percent(25)
-one_to_one_matches = matches.one_to_one()
 high_confidence = matches.filter(min_score=0.7)
-one_to_one_strict = matches.one_to_one(threshold=0.5)
+
+# One-to-one selectors — three flavours, pick the one that fits your task:
+one_to_one_matches = matches.one_to_one_hungarian()           # globally optimal (default)
+one_to_one_strict = matches.one_to_one_hungarian(threshold=0.5)
+greedy_legacy = matches.one_to_one_greedy()                   # legacy greedy assignment
+mutual_only = matches.one_to_one_mutual_top(n=1)              # mutual nearest neighbour
 ```
 
+`one_to_one_hungarian` (Hungarian assignment via `scipy.optimize.linear_sum_assignment`) is the recommended default and is what `Precision` / `Recall` / `F1Score` apply when their `one_to_one` flag is set. `one_to_one_greedy` preserves the legacy greedy behaviour for backwards compatibility. `one_to_one_mutual_top(n)` keeps a pair only when each side ranks the other in its top-`n` — a high-precision filter that drops one-sided affinities.
+
 ### Match details (Coma)
 
 When using the Coma matcher, per-sub-matcher score breakdowns are available via `.details`:
@@ -175,6 +191,15 @@ metrics_custom = matches.get_metrics(ground_truth, metrics={F1Score(one_to_one=F
 metrics_predefined_set = matches.get_metrics(ground_truth, metrics=METRICS_PRECISION_INCREASING_N)
 ```
 
+The 1:1 selection algorithm used when a metric's `one_to_one` flag is `True` can be overridden per call (default `"hungarian"`):
+
+```python
+metrics_strict   = matches.get_metrics(ground_truth, metrics={F1Score()}, one_to_one_method="mutual_top")
+metrics_legacy   = matches.get_metrics(ground_truth, metrics={F1Score()}, one_to_one_method="greedy")
+```
+
+Valid values are `"hungarian"` (default), `"greedy"`, and `"mutual_top"`. Metrics whose `one_to_one` flag is `False` (e.g. `MeanReciprocalRank`, `RecallAtSizeofGroundTruth`) ignore the argument.
+
 
 ### Example
 The following block of code shows: 1) how to run a matcher from Valentine on two DataFrames storing information about job candidates, and then 2) how to assess its effectiveness based on a given ground truth. More examples are available in the [`examples/`](https://github.com/delftdata/valentine/tree/master/examples) directory, including a [pandas example](https://github.com/delftdata/valentine/blob/master/examples/valentine_example_pandas.py), a [Polars example](https://github.com/delftdata/valentine/blob/master/examples/valentine_example_polars.py), and a [mixed pandas+Polars example](https://github.com/delftdata/valentine/blob/master/examples/valentine_example_mixed.py).

diff --git a/docs/api.md b/docs/api.md
@@ -141,7 +141,8 @@ class MatcherResults(Mapping[ColumnPair, float]):
 Immutable `Mapping` returned by [`valentine_match`](#valentine_match).
 Entries are sorted from highest to lowest similarity score on
 construction. Because the mapping is immutable, derived views (such as
-the cached result of [`one_to_one`](#one_to_one)) cannot be silently
+the cached result of [`one_to_one_hungarian`](#one_to_one_hungarian))
+cannot be silently
 invalidated.
 
 ### Mapping protocol
@@ -186,22 +187,48 @@ All transformations return a **new** `MatcherResults` instance; the
 original is left untouched. Sub-matcher details are carried over to the
 filtered subset.
 
-#### `one_to_one`
+#### `one_to_one_hungarian`
 
 ```python
-def one_to_one(threshold: float | None = None) -> MatcherResults
+def one_to_one_hungarian(threshold: float | None = None) -> MatcherResults
 ```
 
-Greedy bipartite filter: starting from the highest-scoring pair, assign
-each source and each target column **at most one** partner. Pairs below
-`threshold` are discarded.
+Default 1:1 selector. Globally optimal bipartite filter via Hungarian
+assignment (`scipy.optimize.linear_sum_assignment`): each source and
+each target column appears in **at most one** returned pair, with the
+assignment chosen to maximise total similarity. Pairs below `threshold`
+are discarded.
 
 - `threshold=None` (default) uses the median of unique similarity scores
   as the cutoff, and the result is cached.
 - Passing an explicit `threshold` bypasses the cache.
 - When the input has fewer than two distinct score values, all entries
   are returned unchanged.
 
+#### `one_to_one_greedy`
+
+```python
+def one_to_one_greedy(threshold: float | None = None) -> MatcherResults
+```
+
+Greedy bipartite filter, kept for backwards compatibility. Starting
+from the highest-scoring pair, greedily assigns each source and each
+target column at most one partner. Same threshold semantics as
+`one_to_one_hungarian`. Greedy can lock in a locally-best pair that
+blocks a better global assignment, so prefer the Hungarian variant
+unless you need the legacy behaviour.
+
+#### `one_to_one_mutual_top`
+
+```python
+def one_to_one_mutual_top(n: int = 1) -> MatcherResults
+```
+
+Mutual top-`n` filter: keeps pair `(s, t)` only if `t` is in `s`'s
+top-`n` targets AND `s` is in `t`'s top-`n` sources. With `n=1` this
+is the classic mutual nearest-neighbour filter — high-precision, drops
+one-sided affinities. Strictly stricter than `one_to_one_hungarian`.
+
 #### `filter`
 
 ```python
@@ -615,7 +642,7 @@ Precision(one_to_one: bool = True)
 ```
 
 `TP / (TP + FP)`. When `one_to_one=True` (default), applies
-`MatcherResults.one_to_one()` before counting.
+`MatcherResults.one_to_one_hungarian()` before counting.
 
 #### `Recall`
 

diff --git a/docs/changelog.md b/docs/changelog.md
@@ -84,7 +84,7 @@ coming from 0.5.x or earlier, the changes below will affect your code.
   `valentine_match` / `valentine_match_batch` pair.
 - **Immutable [`MatcherResults`](api.md#matcherresults).** The result
   object is now a `Mapping`, not a `dict` subclass. Derived views
-  (e.g. [`one_to_one()`](api.md#one_to_one)) are cached and cannot be
+  (e.g. [`one_to_one_hungarian()`](api.md#one_to_one_hungarian)) are cached and cannot be
   silently invalidated.
 - [`Coma`](api.md#coma) is now a pure-Python implementation of
   COMA 3.0 — no JVM dependency. Constructor signature updated to

diff --git a/docs/example.md b/docs/example.md
@@ -60,7 +60,7 @@ def main():
 
     # 4. Reduce to one-to-one matches (greedy, highest-first).
     print("\nGetting the one-to-one matches:")
-    pp.pprint(matches.one_to_one())
+    pp.pprint(matches.one_to_one_hungarian())
 
     # 5. If you have a ground truth, compute evaluation metrics.
     ground_truth = [

diff --git a/docs/faq.md b/docs/faq.md
@@ -95,7 +95,7 @@ provides three reduction helpers:
 ```python
 matches.take_top_n(10)              # absolute top 10
 matches.take_top_percent(5)         # top 5%
-matches.one_to_one()                # bidirectional best matches
+matches.one_to_one_hungarian()                # bidirectional best matches
 ```
 
 All three return a new `MatcherResults` — the original is immutable.

diff --git a/docs/metrics.md b/docs/metrics.md
@@ -71,7 +71,7 @@ from valentine.metrics import (
 
 `Precision`, `Recall`, `F1Score` and `PrecisionTopNPercent` all accept a
 `one_to_one: bool` flag that applies
-[`MatcherResults.one_to_one()`](api.md#one_to_one) before counting.
+[`MatcherResults.one_to_one_hungarian()`](api.md#one_to_one_hungarian) before counting.
 `PrecisionTopNPercent` additionally takes `n: int` for the cutoff, and
 `RecallAtSizeofGroundTruth` defaults to `one_to_one=False`. See the
 [API reference](api.md#built-in-metrics) for full defaults.

diff --git a/docs/results.md b/docs/results.md
@@ -10,7 +10,7 @@ mapping** of [`ColumnPair`](api.md#columnpair) keys to similarity
 scores, sorted from highest score to lowest. It behaves like a `dict`
 for lookup and iteration, but cannot be mutated (preventing accidental
 invalidation of cached derived views such as
-[`one_to_one()`](api.md#one_to_one)).
+[`one_to_one_hungarian()`](api.md#one_to_one_hungarian)).
 
 For the authoritative method signatures, see the API reference for
 [`MatcherResults`](api.md#matcherresults) and
@@ -82,24 +82,24 @@ strong = matches.filter(min_score=0.7)
 
 # Reduce to one-to-one matches (greedy, highest-first). Threshold defaults
 # to the median score of the current results.
-one_to_one = matches.one_to_one()
+one_to_one = matches.one_to_one_hungarian()
 
 # Override the threshold to be stricter
-strict = matches.one_to_one(threshold=0.8)
+strict = matches.one_to_one_hungarian(threshold=0.8)
 ```
 
 Each method is documented in full in the API reference:
 [`take_top_n`](api.md#take_top_n),
 [`take_top_percent`](api.md#take_top_percent),
 [`filter`](api.md#filter), and
-[`one_to_one`](api.md#one_to_one).
+[`one_to_one`](api.md#one_to_one_hungarian).
 
 Every transformation returns a **new**
 [`MatcherResults`](api.md#matcherresults) instance, so you can chain
 them:
 
 ```python
-best_strict_pairs = matches.filter(min_score=0.5).one_to_one(threshold=0.7)
+best_strict_pairs = matches.filter(min_score=0.5).one_to_one_hungarian(threshold=0.7)
 ```
 
 !!! tip "Details propagation"

diff --git a/examples/valentine_example_mixed.py b/examples/valentine_example_mixed.py
@@ -37,7 +37,7 @@ def main():
         print(f"  {pair.source_column:>20s} <-> {pair.target_column:<20s}  {score:.4f}")
 
     print("\nOne-to-one matches:")
-    for pair, score in matches.one_to_one().items():
+    for pair, score in matches.one_to_one_hungarian().items():
         print(f"  {pair.source_column:>20s} <-> {pair.target_column:<20s}  {score:.4f}")
 
     # Evaluate against ground truth

diff --git a/examples/valentine_example_pandas.py b/examples/valentine_example_pandas.py
@@ -35,7 +35,7 @@ def main():
             print(f"  {'':>20s}      [{breakdown}]")
 
     print("\nGetting the one-to-one matches:")
-    pp.pprint(matches.one_to_one())
+    pp.pprint(matches.one_to_one_hungarian())
 
     # If ground truth available valentine could calculate the metrics
     ground_truth = [

diff --git a/examples/valentine_example_polars.py b/examples/valentine_example_polars.py
@@ -35,7 +35,7 @@ def main():
             print(f"  {'':>20s}      [{breakdown}]")
 
     print("\nGetting the one-to-one matches:")
-    pp.pprint(matches.one_to_one())
+    pp.pprint(matches.one_to_one_hungarian())
 
     # If ground truth available valentine could calculate the metrics
     ground_truth = [