From 468a4483ef653adf2ed70b9945f47abdfefe7d43 Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 24 Apr 2026 14:20:07 -0400 Subject: [PATCH 1/4] Expand cross-similarity benchmark to larger sizes and trim variants Replace the 100/1000/2000 molecule scan with a 2k-32k sweep driven by a shared fingerprint set generated once up front. Drop the multiprocess rdkit and nvmolkit CPU-collect benchmarks, keeping only the serial rdkit baseline and the nvmolkit GPU-only path. Cosine similarity is now opt-in via a --cosine flag so default runs only time Tanimoto. --- benchmarks/cross_similarity_bench.py | 113 ++++++++++----------------- 1 file changed, 43 insertions(+), 70 deletions(-) diff --git a/benchmarks/cross_similarity_bench.py b/benchmarks/cross_similarity_bench.py index 89150dba..5396de56 100644 --- a/benchmarks/cross_similarity_bench.py +++ b/benchmarks/cross_similarity_bench.py @@ -13,95 +13,68 @@ # See the License for the specific language governing permissions and # limitations under the License. -import argparse -import multiprocessing - import pandas as pd import pyperf import torch from rdkit.Chem import MolFromSmiles, rdFingerprintGenerator from rdkit.DataStructs import BulkCosineSimilarity, BulkTanimotoSimilarity -from nvmolkit.similarity import ( - crossCosineSimilarity, - crossTanimotoSimilarity, - crossCosineSimilarityMemoryConstrained, - crossTanimotoSimilarityMemoryConstrained, -) +from nvmolkit.similarity import crossCosineSimilarity, crossTanimotoSimilarity from nvmolkit.fingerprints import MorganFingerprintGenerator -df = pd.read_csv("data/benchmark_smiles.csv") -smis = df.iloc[:, 0].to_list()[:2000] -mols = [MolFromSmiles(smi) for smi in smis] - -runner = pyperf.Runner(min_time=0.01, values=3, processes=1, loops=3) -runner.metadata["description"] = f"Cross Similarity benchmark" +SIZES = [2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 20000, 24000, 28000, 32000] def rdkit_sim(fps, sim_type): if sim_type.lower() == "tanimoto": - sim = [BulkTanimotoSimilarity(fps[i], fps) for i in range(len(fps))] - elif sim_type.lower() == "cosine": - sim = [BulkCosineSimilarity(fps[i], fps) for i in range(len(fps))] - - -def _internal_mp(fps, idx, sim_type): - if sim_type.lower() == "tanimoto": - return BulkTanimotoSimilarity(fps[idx], fps) + [BulkTanimotoSimilarity(fps[i], fps) for i in range(len(fps))] elif sim_type.lower() == "cosine": - return BulkCosineSimilarity(fps[idx], fps) - - -def rdkit_sim_mp(fps, sim_type): - with multiprocessing.Pool(16) as pool: - sim = pool.starmap(_internal_mp, ([fps, i, sim_type] for i in range(len(fps)))) + [BulkCosineSimilarity(fps[i], fps) for i in range(len(fps))] def nvmolkit_sim_gpu_only(fps, sim_type): if sim_type.lower() == "tanimoto": - sim = crossTanimotoSimilarity(fps) + crossTanimotoSimilarity(fps) elif sim_type.lower() == "cosine": - sim = crossCosineSimilarity(fps) + crossCosineSimilarity(fps) torch.cuda.synchronize() -def nvmolkit_sim_cpu_collect(fps, sim_type): - if sim_type.lower() == "tanimoto": - out = crossTanimotoSimilarityMemoryConstrained(fps) - elif sim_type.lower() == "cosine": - out = crossCosineSimilarityMemoryConstrained(fps) - - -for sim_type in ("tanimoto", "cosine"): - for molNum in ( - 100, - 1000, - 2000, - ): - for fpsize in (1024,): - generator = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=fpsize) - fps = [generator.GetFingerprint(mol) for mol in mols[:molNum]] - while len(fps) < molNum: - fps += fps - fps = fps[:molNum] - - name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols" - runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name}) - - name2 = f"rdkit_multiprocess_{sim_type}sim_fpsize_{fpsize}_{molNum}mols" - runner.bench_func(name2, rdkit_sim_mp, fps, sim_type, metadata={"name": str(name2)}) - - while len(mols) < molNum: - mols += mols - mols = mols[:molNum] - nvmolkit_fpgen = MorganFingerprintGenerator(radius=3, fpSize=fpsize) - - nvmolkit_fps_cu = torch.as_tensor(nvmolkit_fpgen.GetFingerprints(mols[:molNum]), device="cuda") - name3 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only" - runner.bench_func(name3, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": str(name3)}) - - name4 = f"nvmolkit_cpu-collect_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_cpu_result" - runner.bench_func( - name4, nvmolkit_sim_cpu_collect, nvmolkit_fps_cu, sim_type, metadata={"name": str(name4)} - ) +runner = pyperf.Runner(min_time=0.01, values=3, processes=1, loops=3) +runner.metadata["description"] = "Cross Similarity benchmark" +runner.argparser.add_argument( + "--input", type=str, default="data/benchmark_smiles.csv", help="Path to input SMILES CSV file" +) +runner.argparser.add_argument("--cosine", action="store_true", help="Include cosine similarity benchmarks") +args = runner.parse_args() + +sim_types = ("tanimoto", "cosine") if args.cosine else ("tanimoto",) +fpsize = 1024 +max_size = max(SIZES) + +df = pd.read_csv(args.input) +smis = df.iloc[:, 0].to_list() +mols = [MolFromSmiles(smi) for smi in smis] +mols = [mol for mol in mols if mol is not None] +while len(mols) < max_size: + mols += mols +mols = mols[:max_size] + +rdkit_fpgen = rdFingerprintGenerator.GetMorganGenerator(radius=3, fpSize=fpsize) +rdkit_fps_all = [rdkit_fpgen.GetFingerprint(mol) for mol in mols] +nvmolkit_fpgen = MorganFingerprintGenerator(radius=3, fpSize=fpsize) +nvmolkit_fps_all = torch.as_tensor(nvmolkit_fpgen.GetFingerprints(mols), device="cuda") + +for sim_type in sim_types: + for molNum in SIZES: + fps = rdkit_fps_all[:molNum] + nvmolkit_fps_cu = nvmolkit_fps_all[:molNum].contiguous() + + name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols" + runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name}) + + name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only" + runner.bench_func( + name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2} + ) From 7ab3cec72ad7a9bcc341942cc89e18de6965ff6b Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 24 Apr 2026 14:24:10 -0400 Subject: [PATCH 2/4] Formatting --- benchmarks/cross_similarity_bench.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmarks/cross_similarity_bench.py b/benchmarks/cross_similarity_bench.py index 5396de56..62441623 100644 --- a/benchmarks/cross_similarity_bench.py +++ b/benchmarks/cross_similarity_bench.py @@ -75,6 +75,4 @@ def nvmolkit_sim_gpu_only(fps, sim_type): runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name}) name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only" - runner.bench_func( - name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2} - ) + runner.bench_func(name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2}) From 70023dad79ed8596e91e4a9f91d47d3a341afd99 Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 24 Apr 2026 14:30:41 -0400 Subject: [PATCH 3/4] Address greptile comment --- benchmarks/cross_similarity_bench.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benchmarks/cross_similarity_bench.py b/benchmarks/cross_similarity_bench.py index 62441623..89509c88 100644 --- a/benchmarks/cross_similarity_bench.py +++ b/benchmarks/cross_similarity_bench.py @@ -57,6 +57,8 @@ def nvmolkit_sim_gpu_only(fps, sim_type): smis = df.iloc[:, 0].to_list() mols = [MolFromSmiles(smi) for smi in smis] mols = [mol for mol in mols if mol is not None] +if not mols: + raise ValueError(f"No molecules parsed from {args.input}") while len(mols) < max_size: mols += mols mols = mols[:max_size] From 1d59817ad1712a7835d7760caf82f7de5678b6bb Mon Sep 17 00:00:00 2001 From: Kevin Boyd Date: Fri, 24 Apr 2026 14:38:46 -0400 Subject: [PATCH 4/4] Make slow CPU path singlular --- benchmarks/cross_similarity_bench.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/benchmarks/cross_similarity_bench.py b/benchmarks/cross_similarity_bench.py index 89509c88..b26a3a7a 100644 --- a/benchmarks/cross_similarity_bench.py +++ b/benchmarks/cross_similarity_bench.py @@ -24,6 +24,7 @@ SIZES = [2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 20000, 24000, 28000, 32000] +CPU_SINGLE_VALUE_ABOVE = 6000 def rdkit_sim(fps, sim_type): @@ -52,6 +53,7 @@ def nvmolkit_sim_gpu_only(fps, sim_type): sim_types = ("tanimoto", "cosine") if args.cosine else ("tanimoto",) fpsize = 1024 max_size = max(SIZES) +default_values = runner.args.values df = pd.read_csv(args.input) smis = df.iloc[:, 0].to_list() @@ -73,8 +75,10 @@ def nvmolkit_sim_gpu_only(fps, sim_type): fps = rdkit_fps_all[:molNum] nvmolkit_fps_cu = nvmolkit_fps_all[:molNum].contiguous() + runner.args.values = 1 if molNum > CPU_SINGLE_VALUE_ABOVE else default_values name = f"rdkit_{sim_type}sim_fpsize_{fpsize}_{molNum}mols" runner.bench_func(name, rdkit_sim, fps, sim_type, metadata={"name": name}) + runner.args.values = default_values name2 = f"nvmolkit_gpu-only_{sim_type}sim_fpsize_{fpsize}_{molNum}mols_gpu_only" runner.bench_func(name2, nvmolkit_sim_gpu_only, nvmolkit_fps_cu, sim_type, metadata={"name": name2})