From 205839da5091b2a29588a1bf713cd6f3cb9c2c2f Mon Sep 17 00:00:00 2001 From: Taksh Date: Thu, 9 Apr 2026 15:36:39 +0530 Subject: [PATCH] Fix set comprehension in per_class_scorer causing wrong overall metrics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The overall precision/recall/F1 computation uses set comprehensions ({v for ...}) instead of list comprehensions ([v for ...]). Sets deduplicate values, so if two entity types share the same TP/FP/FN count, only one copy is summed — silently producing incorrect metrics. Co-Authored-By: Claude Opus 4.6 (1M context) --- scispacy/per_class_scorer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scispacy/per_class_scorer.py b/scispacy/per_class_scorer.py index 57993b0b..58d5a60e 100644 --- a/scispacy/per_class_scorer.py +++ b/scispacy/per_class_scorer.py @@ -69,13 +69,13 @@ def get_metric(self, reset: bool = False): # Compute the precision, recall and f1 for all spans jointly. sum_true_positives = sum( - {v for k, v in self._true_positives.items() if k != "untyped"} + [v for k, v in self._true_positives.items() if k != "untyped"] ) sum_false_positives = sum( - {v for k, v in self._false_positives.items() if k != "untyped"} + [v for k, v in self._false_positives.items() if k != "untyped"] ) sum_false_negatives = sum( - {v for k, v in self._false_negatives.items() if k != "untyped"} + [v for k, v in self._false_negatives.items() if k != "untyped"] ) precision, recall, f1_measure = self._compute_metrics( sum_true_positives, sum_false_positives, sum_false_negatives