From 9aaad7842032b555d7371d81f9eb8a5f0ec63862 Mon Sep 17 00:00:00 2001
From: Yi Su <90744702+suu-yi@users.noreply.github.com>
Date: Wed, 8 Apr 2026 14:50:51 +0200
Subject: [PATCH 1/6] check genes lost from mapping

---
 cytetype/preprocessing/extraction.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py
index 44f9c9c..b6d00e1 100644
--- a/cytetype/preprocessing/extraction.py
+++ b/cytetype/preprocessing/extraction.py
@@ -78,10 +78,18 @@ def extract_marker_genes(
             gene_ids_to_name[gene] for gene in top_genes if gene in gene_ids_to_name
         ]
 
-    if not any_genes_found:
+        lost_genes = len(top_genes) - len(markers[cluster_id])
+        if lost_genes > 0:
+            logger.warning(
+                f"Number of lost genes ({lost_genes}) for group '{group_name}' (cluster '{cluster_id}'). \n"
+                f"This could indicate inconsistencies with the marker genes and the genes in adata.var."
+            )
+
+    if not any(markers.values()):
         raise ValueError(
-            "No marker genes found for any group. This could indicate issues with the "
-            "rank_genes_groups analysis or that all groups have insufficient marker genes."
+            "All marker gene lists are empty. Gene names in rank_genes_groups "
+            "could not be matched to adata.var_names. This typically happens "
+            "when var_names were changed after rank_genes_groups was run."
         )
 
     return markers

From 29caf56d7efb67a6a1a304ec4ed314ed51d3b03d Mon Sep 17 00:00:00 2001
From: Yi Su <90744702+suu-yi@users.noreply.github.com>
Date: Wed, 8 Apr 2026 14:51:11 +0200
Subject: [PATCH 2/6] marker genes id like check

---
 cytetype/preprocessing/validation.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/cytetype/preprocessing/validation.py b/cytetype/preprocessing/validation.py
index d458848..7d8794b 100644
--- a/cytetype/preprocessing/validation.py
+++ b/cytetype/preprocessing/validation.py
@@ -338,6 +338,28 @@ def validate_adata(
             f"'names' field in `adata.uns['{rank_genes_key}']` is missing or invalid."
         )
 
+    rank_names = adata.uns[rank_genes_key]["names"]
+    try:
+        sample_genes = [
+            str(g)
+            for field in rank_names.dtype.names[:3]
+            for g in rank_names[field][:20]
+        ]
+    except Exception:
+        sample_genes = []
+
+    if sample_genes:
+        id_pct = _id_like_percentage(sample_genes)
+        if id_pct > 50:
+            examples = [g for g in sample_genes if _is_gene_id_like(g)][:3]
+            logger.warning(
+                f"rank_genes_groups results contain gene IDs rather than gene symbols "
+                f"(e.g. {examples}). This typically happens when var_names were Ensembl "
+                f"IDs at the time rank_genes_groups was run but have since been replaced "
+                f"with gene symbols. Marker gene extraction may fail or produce empty "
+                f"results. Consider re-running sc.tl.rank_genes_groups on the current adata."
+            )
+
     # Validate coordinates with fallback options (case-insensitive matching)
     common_coordinate_keys = [coordinates_key, "X_umap", "X_tsne", "X_pca"]
     found_coordinates_key: str | None = None

From c711577f79d6a49a4e1d726166f809aafad6697b Mon Sep 17 00:00:00 2001
From: Yi Su <90744702+suu-yi@users.noreply.github.com>
Date: Wed, 8 Apr 2026 15:45:32 +0200
Subject: [PATCH 3/6] Update extraction.py

---
 cytetype/preprocessing/extraction.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py
index b6d00e1..6a525cd 100644
--- a/cytetype/preprocessing/extraction.py
+++ b/cytetype/preprocessing/extraction.py
@@ -91,6 +91,10 @@ def extract_marker_genes(
             "could not be matched to adata.var_names. This typically happens "
             "when var_names were changed after rank_genes_groups was run."
         )
+    if not any_genes_found:
+        raise ValueError(
+            "No marker genes found for any group. This could indicate issues with the "
+            "rank_genes_groups analysis or that all groups have insufficient marker genes."
 
     return markers
 

From 573aef5bb934d26f8c92c495abc6961fd5d11048 Mon Sep 17 00:00:00 2001
From: Yi Su <90744702+suu-yi@users.noreply.github.com>
Date: Wed, 8 Apr 2026 16:05:40 +0200
Subject: [PATCH 4/6] bracket

---
 cytetype/preprocessing/extraction.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py
index 6a525cd..41143c8 100644
--- a/cytetype/preprocessing/extraction.py
+++ b/cytetype/preprocessing/extraction.py
@@ -95,6 +95,7 @@ def extract_marker_genes(
         raise ValueError(
             "No marker genes found for any group. This could indicate issues with the "
             "rank_genes_groups analysis or that all groups have insufficient marker genes."
+        )
 
     return markers
 

From a720fa57fefe4264903b685ea19c6fde5595998c Mon Sep 17 00:00:00 2001
From: Yi Su <90744702+suu-yi@users.noreply.github.com>
Date: Thu, 9 Apr 2026 08:35:24 +0200
Subject: [PATCH 5/6] Update validation.py

---
 cytetype/preprocessing/validation.py | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/cytetype/preprocessing/validation.py b/cytetype/preprocessing/validation.py
index 7d8794b..d458848 100644
--- a/cytetype/preprocessing/validation.py
+++ b/cytetype/preprocessing/validation.py
@@ -338,28 +338,6 @@ def validate_adata(
             f"'names' field in `adata.uns['{rank_genes_key}']` is missing or invalid."
         )
 
-    rank_names = adata.uns[rank_genes_key]["names"]
-    try:
-        sample_genes = [
-            str(g)
-            for field in rank_names.dtype.names[:3]
-            for g in rank_names[field][:20]
-        ]
-    except Exception:
-        sample_genes = []
-
-    if sample_genes:
-        id_pct = _id_like_percentage(sample_genes)
-        if id_pct > 50:
-            examples = [g for g in sample_genes if _is_gene_id_like(g)][:3]
-            logger.warning(
-                f"rank_genes_groups results contain gene IDs rather than gene symbols "
-                f"(e.g. {examples}). This typically happens when var_names were Ensembl "
-                f"IDs at the time rank_genes_groups was run but have since been replaced "
-                f"with gene symbols. Marker gene extraction may fail or produce empty "
-                f"results. Consider re-running sc.tl.rank_genes_groups on the current adata."
-            )
-
     # Validate coordinates with fallback options (case-insensitive matching)
     common_coordinate_keys = [coordinates_key, "X_umap", "X_tsne", "X_pca"]
     found_coordinates_key: str | None = None

From 6e42d5687afbfdf6385fb03bcc34597a93336296 Mon Sep 17 00:00:00 2001
From: Yi Su <90744702+suu-yi@users.noreply.github.com>
Date: Thu, 9 Apr 2026 08:35:37 +0200
Subject: [PATCH 6/6] Update extraction.py

---
 cytetype/preprocessing/extraction.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py
index 41143c8..693a451 100644
--- a/cytetype/preprocessing/extraction.py
+++ b/cytetype/preprocessing/extraction.py
@@ -85,17 +85,18 @@ def extract_marker_genes(
                 f"This could indicate inconsistencies with the marker genes and the genes in adata.var."
             )
 
+    if not any_genes_found:
+        raise ValueError(
+            "No marker genes found for any group. This could indicate issues with the "
+            "rank_genes_groups analysis or that all groups have insufficient marker genes."
+        )
+
     if not any(markers.values()):
         raise ValueError(
             "All marker gene lists are empty. Gene names in rank_genes_groups "
             "could not be matched to adata.var_names. This typically happens "
             "when var_names were changed after rank_genes_groups was run."
         )
-    if not any_genes_found:
-        raise ValueError(
-            "No marker genes found for any group. This could indicate issues with the "
-            "rank_genes_groups analysis or that all groups have insufficient marker genes."
-        )
 
     return markers