From 9aaad7842032b555d7371d81f9eb8a5f0ec63862 Mon Sep 17 00:00:00 2001 From: Yi Su <90744702+suu-yi@users.noreply.github.com> Date: Wed, 8 Apr 2026 14:50:51 +0200 Subject: [PATCH 1/6] check genes lost from mapping --- cytetype/preprocessing/extraction.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py index 44f9c9c..b6d00e1 100644 --- a/cytetype/preprocessing/extraction.py +++ b/cytetype/preprocessing/extraction.py @@ -78,10 +78,18 @@ def extract_marker_genes( gene_ids_to_name[gene] for gene in top_genes if gene in gene_ids_to_name ] - if not any_genes_found: + lost_genes = len(top_genes) - len(markers[cluster_id]) + if lost_genes > 0: + logger.warning( + f"Number of lost genes ({lost_genes}) for group '{group_name}' (cluster '{cluster_id}'). \n" + f"This could indicate inconsistencies with the marker genes and the genes in adata.var." + ) + + if not any(markers.values()): raise ValueError( - "No marker genes found for any group. This could indicate issues with the " - "rank_genes_groups analysis or that all groups have insufficient marker genes." + "All marker gene lists are empty. Gene names in rank_genes_groups " + "could not be matched to adata.var_names. This typically happens " + "when var_names were changed after rank_genes_groups was run." ) return markers From 29caf56d7efb67a6a1a304ec4ed314ed51d3b03d Mon Sep 17 00:00:00 2001 From: Yi Su <90744702+suu-yi@users.noreply.github.com> Date: Wed, 8 Apr 2026 14:51:11 +0200 Subject: [PATCH 2/6] marker genes id like check --- cytetype/preprocessing/validation.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/cytetype/preprocessing/validation.py b/cytetype/preprocessing/validation.py index d458848..7d8794b 100644 --- a/cytetype/preprocessing/validation.py +++ b/cytetype/preprocessing/validation.py @@ -338,6 +338,28 @@ def validate_adata( f"'names' field in `adata.uns['{rank_genes_key}']` is missing or invalid." ) + rank_names = adata.uns[rank_genes_key]["names"] + try: + sample_genes = [ + str(g) + for field in rank_names.dtype.names[:3] + for g in rank_names[field][:20] + ] + except Exception: + sample_genes = [] + + if sample_genes: + id_pct = _id_like_percentage(sample_genes) + if id_pct > 50: + examples = [g for g in sample_genes if _is_gene_id_like(g)][:3] + logger.warning( + f"rank_genes_groups results contain gene IDs rather than gene symbols " + f"(e.g. {examples}). This typically happens when var_names were Ensembl " + f"IDs at the time rank_genes_groups was run but have since been replaced " + f"with gene symbols. Marker gene extraction may fail or produce empty " + f"results. Consider re-running sc.tl.rank_genes_groups on the current adata." + ) + # Validate coordinates with fallback options (case-insensitive matching) common_coordinate_keys = [coordinates_key, "X_umap", "X_tsne", "X_pca"] found_coordinates_key: str | None = None From c711577f79d6a49a4e1d726166f809aafad6697b Mon Sep 17 00:00:00 2001 From: Yi Su <90744702+suu-yi@users.noreply.github.com> Date: Wed, 8 Apr 2026 15:45:32 +0200 Subject: [PATCH 3/6] Update extraction.py --- cytetype/preprocessing/extraction.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py index b6d00e1..6a525cd 100644 --- a/cytetype/preprocessing/extraction.py +++ b/cytetype/preprocessing/extraction.py @@ -91,6 +91,10 @@ def extract_marker_genes( "could not be matched to adata.var_names. This typically happens " "when var_names were changed after rank_genes_groups was run." ) + if not any_genes_found: + raise ValueError( + "No marker genes found for any group. This could indicate issues with the " + "rank_genes_groups analysis or that all groups have insufficient marker genes." return markers From 573aef5bb934d26f8c92c495abc6961fd5d11048 Mon Sep 17 00:00:00 2001 From: Yi Su <90744702+suu-yi@users.noreply.github.com> Date: Wed, 8 Apr 2026 16:05:40 +0200 Subject: [PATCH 4/6] bracket --- cytetype/preprocessing/extraction.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py index 6a525cd..41143c8 100644 --- a/cytetype/preprocessing/extraction.py +++ b/cytetype/preprocessing/extraction.py @@ -95,6 +95,7 @@ def extract_marker_genes( raise ValueError( "No marker genes found for any group. This could indicate issues with the " "rank_genes_groups analysis or that all groups have insufficient marker genes." + ) return markers From a720fa57fefe4264903b685ea19c6fde5595998c Mon Sep 17 00:00:00 2001 From: Yi Su <90744702+suu-yi@users.noreply.github.com> Date: Thu, 9 Apr 2026 08:35:24 +0200 Subject: [PATCH 5/6] Update validation.py --- cytetype/preprocessing/validation.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/cytetype/preprocessing/validation.py b/cytetype/preprocessing/validation.py index 7d8794b..d458848 100644 --- a/cytetype/preprocessing/validation.py +++ b/cytetype/preprocessing/validation.py @@ -338,28 +338,6 @@ def validate_adata( f"'names' field in `adata.uns['{rank_genes_key}']` is missing or invalid." ) - rank_names = adata.uns[rank_genes_key]["names"] - try: - sample_genes = [ - str(g) - for field in rank_names.dtype.names[:3] - for g in rank_names[field][:20] - ] - except Exception: - sample_genes = [] - - if sample_genes: - id_pct = _id_like_percentage(sample_genes) - if id_pct > 50: - examples = [g for g in sample_genes if _is_gene_id_like(g)][:3] - logger.warning( - f"rank_genes_groups results contain gene IDs rather than gene symbols " - f"(e.g. {examples}). This typically happens when var_names were Ensembl " - f"IDs at the time rank_genes_groups was run but have since been replaced " - f"with gene symbols. Marker gene extraction may fail or produce empty " - f"results. Consider re-running sc.tl.rank_genes_groups on the current adata." - ) - # Validate coordinates with fallback options (case-insensitive matching) common_coordinate_keys = [coordinates_key, "X_umap", "X_tsne", "X_pca"] found_coordinates_key: str | None = None From 6e42d5687afbfdf6385fb03bcc34597a93336296 Mon Sep 17 00:00:00 2001 From: Yi Su <90744702+suu-yi@users.noreply.github.com> Date: Thu, 9 Apr 2026 08:35:37 +0200 Subject: [PATCH 6/6] Update extraction.py --- cytetype/preprocessing/extraction.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/cytetype/preprocessing/extraction.py b/cytetype/preprocessing/extraction.py index 41143c8..693a451 100644 --- a/cytetype/preprocessing/extraction.py +++ b/cytetype/preprocessing/extraction.py @@ -85,17 +85,18 @@ def extract_marker_genes( f"This could indicate inconsistencies with the marker genes and the genes in adata.var." ) + if not any_genes_found: + raise ValueError( + "No marker genes found for any group. This could indicate issues with the " + "rank_genes_groups analysis or that all groups have insufficient marker genes." + ) + if not any(markers.values()): raise ValueError( "All marker gene lists are empty. Gene names in rank_genes_groups " "could not be matched to adata.var_names. This typically happens " "when var_names were changed after rank_genes_groups was run." ) - if not any_genes_found: - raise ValueError( - "No marker genes found for any group. This could indicate issues with the " - "rank_genes_groups analysis or that all groups have insufficient marker genes." - ) return markers