From 8044296c8bc09583d5656f4041b510b602cc00d3 Mon Sep 17 00:00:00 2001 From: Arne Lammers Date: Thu, 4 Jun 2026 11:46:53 +0200 Subject: [PATCH] [add]: added allowed-missing-percentage parameter including docs --- README.md | 1 + specreboot/run_workflow_matchms.py | 11 ++++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 46f496c..0fec7f7 100644 --- a/README.md +++ b/README.md @@ -203,6 +203,7 @@ Example — multiple selected metrics: | `--batch-size` | `10` | Replicates per thread-pool batch | | `--sim-threshold` | `0.7` | Mean similarity threshold for cosine/modcosine/spec2vec graphs | | `--sim-threshold-ms2dp` | `0.8` | Mean similarity threshold for MS2DeepScore graphs | +| `--spec2vec-allowed-missing-percentage` | `5.0` | Maximum percentage of missing peaks allowed in spec2vec similarity calculation | | `--support-threshold` | `0.5` | Minimum edge support for threshold graph | | `--max-component-size` | `100` | Maximum connected-component size | | `--max-links` | `None` | Maximum edges per node; each node keeps only its top-N neighbours by mean similarity. `None` disables the filter | diff --git a/specreboot/run_workflow_matchms.py b/specreboot/run_workflow_matchms.py index 4cee4d6..5e3400f 100644 --- a/specreboot/run_workflow_matchms.py +++ b/specreboot/run_workflow_matchms.py @@ -149,6 +149,15 @@ def build_parser(p: argparse.ArgumentParser): "MS2DeepScore often uses a higher cutoff than cosine-based scores." ), ) + p.add_argument( + "--spec2vec-allowed-missing-percentage", + type=float, + default=5.0, + help=( + "Maximum percentage of missing peaks allowed in spec2vec similarity calculation. " + "Prevents failed similarity calculations due to vocabulary gaps, particularly in specific cases involving self-trained models." + ), + ) p.add_argument( "--tolerance", type=float, @@ -358,7 +367,7 @@ def run(args): similarity_objs["Spec2Vec"] = Spec2Vec( model=w2v, intensity_weighting_power=0.5, - allowed_missing_percentage=5.0, + allowed_missing_percentage=args.spec2vec_allowed_missing_percentage, ) if "ms2deepscore" in sim_keys: