From 205839da5091b2a29588a1bf713cd6f3cb9c2c2f Mon Sep 17 00:00:00 2001
From: Taksh <takshkothari09@gmail.com>
Date: Thu, 9 Apr 2026 15:36:39 +0530
Subject: [PATCH] Fix set comprehension in per_class_scorer causing wrong
 overall metrics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The overall precision/recall/F1 computation uses set comprehensions
({v for ...}) instead of list comprehensions ([v for ...]). Sets
deduplicate values, so if two entity types share the same TP/FP/FN
count, only one copy is summed — silently producing incorrect metrics.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 scispacy/per_class_scorer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scispacy/per_class_scorer.py b/scispacy/per_class_scorer.py
index 57993b0b..58d5a60e 100644
--- a/scispacy/per_class_scorer.py
+++ b/scispacy/per_class_scorer.py
@@ -69,13 +69,13 @@ def get_metric(self, reset: bool = False):
 
         # Compute the precision, recall and f1 for all spans jointly.
         sum_true_positives = sum(
-            {v for k, v in self._true_positives.items() if k != "untyped"}
+            [v for k, v in self._true_positives.items() if k != "untyped"]
         )
         sum_false_positives = sum(
-            {v for k, v in self._false_positives.items() if k != "untyped"}
+            [v for k, v in self._false_positives.items() if k != "untyped"]
         )
         sum_false_negatives = sum(
-            {v for k, v in self._false_negatives.items() if k != "untyped"}
+            [v for k, v in self._false_negatives.items() if k != "untyped"]
         )
         precision, recall, f1_measure = self._compute_metrics(
             sum_true_positives, sum_false_positives, sum_false_negatives