From e57fbd0c57344f469d9b268b48e7659161370315 Mon Sep 17 00:00:00 2001 From: Sean Date: Fri, 25 Jul 2025 10:29:25 -0400 Subject: [PATCH 1/5] Adding zarr backed SpatialData as output, updating squidpy container --- docker/squidpy/Dockerfile | 3 +-- docker/squidpy/requirements.txt | 11 ++++++++--- pipeline.cwl | 5 +++++ steps/squidpy-analysis.cwl | 4 ++++ 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/docker/squidpy/Dockerfile b/docker/squidpy/Dockerfile index 3c76eaf..7c7dbf3 100644 --- a/docker/squidpy/Dockerfile +++ b/docker/squidpy/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive @@ -11,7 +11,6 @@ RUN apt-get update \ flex \ git \ libfreetype6-dev \ - libigraph0-dev \ libxml2-dev \ libtool \ m4 \ diff --git a/docker/squidpy/requirements.txt b/docker/squidpy/requirements.txt index 466ff82..daebd62 100644 --- a/docker/squidpy/requirements.txt +++ b/docker/squidpy/requirements.txt @@ -1,6 +1,11 @@ +anndata==0.10.9 +dask==2024.12.0 squidpy +numpy==2.1.3 opencv-python -numpy==1.22.0 -pandas==1.3.0 -manhole~=1.8 +spatialdata +spatialdata-io +spatialdata-plot +squidpy + diff --git a/pipeline.cwl b/pipeline.cwl index e14c13b..3eb3c5f 100644 --- a/pipeline.cwl +++ b/pipeline.cwl @@ -42,6 +42,10 @@ outputs: outputSource: salmon_quantification/raw_count_matrix type: File? label: "Unfiltered count matrix from Alevin, converted to H5AD, with intronic counts as separate columns" + sdata_zarr: + outputSource: squidpy_analysis/sdata_zarr + type: Directory? + label: "SpatialData object serialized in zarr format" genome_build_json: outputSource: salmon_quantification/genome_build_json type: File @@ -193,6 +197,7 @@ steps: source: img_dir out: - squidpy_annotated_h5ad + - sdata_zarr - neighborhood_enrichment_plot - co_occurrence_plot - interaction_matrix_plot diff --git a/steps/squidpy-analysis.cwl b/steps/squidpy-analysis.cwl index dce7d62..38be786 100644 --- a/steps/squidpy-analysis.cwl +++ b/steps/squidpy-analysis.cwl @@ -24,6 +24,10 @@ outputs: type: File? outputBinding: glob: squidpy_annotated.h5ad + sdata_zarr: + type: Directory? + outputBinding: + glob: Visium.zarr neighborhood_enrichment_plot: type: File? outputBinding: From 8a8414748f940d0db3a1a3a4402d9539720c4984 Mon Sep 17 00:00:00 2001 From: Sean Date: Wed, 6 Aug 2025 09:48:33 -0400 Subject: [PATCH 2/5] Generating spatialdata object for Visium and Slideseq data --- bin/analysis/squidpy_entry_point.py | 71 +++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 14 deletions(-) diff --git a/bin/analysis/squidpy_entry_point.py b/bin/analysis/squidpy_entry_point.py index 4c7691e..c20303b 100755 --- a/bin/analysis/squidpy_entry_point.py +++ b/bin/analysis/squidpy_entry_point.py @@ -6,41 +6,76 @@ from typing import Iterable, Tuple import anndata -import cv2 import manhole import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import scanpy as sc import squidpy as sq import tifffile as tf from common import Assay from plot_utils import new_plot +import spatialdata -ome_tiff_pattern = re.compile(r"(?P.*)\.ome\.tiff(f?)$") +from spatialdata.models import Image2DModel, Image3DModel, Labels2DModel, Labels3DModel, PointsModel, TableModel +from math import ceil, log2 +from aicsimageio import AICSImage +import geopandas +import shapely +import pandas as pd +desired_pixel_size_for_pyramid = 250 -def find_ome_tiffs(input_dir: Path) -> Iterable[Path]: - """ - Yields 2-tuples: - [0] full Path to source file - [1] output file Path (source file relative to input_dir) - """ +ome_tiff_pattern = re.compile(r"(?P.*)\.ome\.tiff(f?)$") + +def find_ome_tiff(input_dir: Path) -> Path: for dirpath_str, _, filenames in walk(input_dir): dirpath = Path(dirpath_str) for filename in filenames: if ome_tiff_pattern.match(filename): src_filepath = dirpath / filename - yield src_filepath - + return src_filepath + +def get_img_spatialdata(img_dir: Path): + print("Loading image data") + image_file = find_ome_tiff(img_dir) + image = AICSImage(image_file) + image_data_squeezed = image.data.squeeze() + print("... done. Original shape:", image.data.shape) + + image_scale_factors = (2,) * ceil( + log2(max(image_data_squeezed.shape[1:]) / desired_pixel_size_for_pyramid) + ) + + img_for_sdata = Image2DModel.parse( + data=image_data_squeezed, + c_coords=image.channel_names, + scale_factors=image_scale_factors, + ) + + return img_for_sdata + + +def get_shapes_spatialdata(adata:anndata.AnnData)->geopandas.GeoDataFrame: + geo_df = geopandas.GeoDataFrame(index=adata.obs.index) + radius = adata.uns['spatial']['visium']['scalefactors']['spot_diameter_fullres'] / 2 + radius_series = pd.Series(radius, index=geo_df.index) + coords = adata.obsm['spatial'] + points_list = [shapely.Point(coords[i]) for i in range(len(adata.obs.index))] + points_series = geopandas.Series(points_list, index=adata.obs.index) + geo_df['geometry'] = points_series + geo_df['radius'] = radius_series + return geo_df def main(assay: Assay, h5ad_file: Path, img_dir: Path = None): if assay in {Assay.VISIUM_FF, Assay.SLIDESEQ}: adata = anndata.read(h5ad_file) + + table_for_sdata = TableModel.from_adata(adata) + shapes_for_sdata = get_shapes_spatialdata(adata) + adata.obsm["spatial"] = adata.obsm["X_spatial"] + if img_dir: - tiff_file = list(find_ome_tiffs(input_dir=img_dir))[0] + tiff_file = find_ome_tiff(input_dir=img_dir) img = tf.imread(fspath(tiff_file)) library_id = list(adata.uns["spatial"].keys())[0] adata.uns["spatial"][library_id]["images"] = {"hires": img} @@ -48,6 +83,14 @@ def main(assay: Assay, h5ad_file: Path, img_dir: Path = None): "tissue_hires_scalef": 1.0, "spot_diameter_fullres": 89, } + img_for_sdata = get_img_spatialdata(img_dir) + + sdata = spatialdata.SpatialData(images={'visium_fullres_img':img_for_sdata}, shapes={'visium':shapes_for_sdata}, table=table_for_sdata) + + else: + sdata = spatialdata.SpatialData(shapes={'visium':shapes_for_sdata},table=table_for_sdata) + + sdata.write('Visium.zarr') sq.gr.spatial_neighbors(adata) sq.gr.nhood_enrichment(adata, cluster_key="leiden") From 6a5cc7cae7f1287d2164b6e3befafa8a6b09b3e1 Mon Sep 17 00:00:00 2001 From: Sean Date: Fri, 8 Aug 2025 08:53:26 -0400 Subject: [PATCH 3/5] Fixes to spatialdata integration --- bin/analysis/squidpy_entry_point.py | 13 +++++++------ docker/squidpy/requirements.txt | 2 ++ 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/bin/analysis/squidpy_entry_point.py b/bin/analysis/squidpy_entry_point.py index c20303b..cbe35cc 100755 --- a/bin/analysis/squidpy_entry_point.py +++ b/bin/analysis/squidpy_entry_point.py @@ -15,7 +15,7 @@ from plot_utils import new_plot import spatialdata -from spatialdata.models import Image2DModel, Image3DModel, Labels2DModel, Labels3DModel, PointsModel, TableModel +from spatialdata.models import Image2DModel, Image3DModel, Labels2DModel, Labels3DModel, PointsModel, ShapesModel, TableModel from math import ceil, log2 from aicsimageio import AICSImage import geopandas @@ -40,6 +40,7 @@ def get_img_spatialdata(img_dir: Path): image = AICSImage(image_file) image_data_squeezed = image.data.squeeze() print("... done. Original shape:", image.data.shape) + print(f"New shape: {image_data_squeezed.shape}") image_scale_factors = (2,) * ceil( log2(max(image_data_squeezed.shape[1:]) / desired_pixel_size_for_pyramid) @@ -47,29 +48,29 @@ def get_img_spatialdata(img_dir: Path): img_for_sdata = Image2DModel.parse( data=image_data_squeezed, - c_coords=image.channel_names, + dims=['x','y','c'], scale_factors=image_scale_factors, ) return img_for_sdata -def get_shapes_spatialdata(adata:anndata.AnnData)->geopandas.GeoDataFrame: +def get_shapes_spatialdata(adata:anndata.AnnData): geo_df = geopandas.GeoDataFrame(index=adata.obs.index) radius = adata.uns['spatial']['visium']['scalefactors']['spot_diameter_fullres'] / 2 radius_series = pd.Series(radius, index=geo_df.index) coords = adata.obsm['spatial'] points_list = [shapely.Point(coords[i]) for i in range(len(adata.obs.index))] - points_series = geopandas.Series(points_list, index=adata.obs.index) + points_series = geopandas.GeoSeries(points_list, index=adata.obs.index) geo_df['geometry'] = points_series geo_df['radius'] = radius_series - return geo_df + return ShapesModel.parse(geo_df) def main(assay: Assay, h5ad_file: Path, img_dir: Path = None): if assay in {Assay.VISIUM_FF, Assay.SLIDESEQ}: adata = anndata.read(h5ad_file) - table_for_sdata = TableModel.from_adata(adata) + table_for_sdata = TableModel.parse(adata) shapes_for_sdata = get_shapes_spatialdata(adata) adata.obsm["spatial"] = adata.obsm["X_spatial"] diff --git a/docker/squidpy/requirements.txt b/docker/squidpy/requirements.txt index daebd62..09643f7 100644 --- a/docker/squidpy/requirements.txt +++ b/docker/squidpy/requirements.txt @@ -1,5 +1,7 @@ anndata==0.10.9 +aicsimageio dask==2024.12.0 +manhole squidpy numpy==2.1.3 opencv-python From 17883598c47d113088b9700cbe343244fba5c935 Mon Sep 17 00:00:00 2001 From: Sean Date: Wed, 20 Aug 2025 13:11:48 -0400 Subject: [PATCH 4/5] Mod to pipeline to support slideseq output --- bin/analysis/squidpy_entry_point.py | 18 ++++++++++++------ steps/squidpy-analysis.cwl | 2 +- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bin/analysis/squidpy_entry_point.py b/bin/analysis/squidpy_entry_point.py index cbe35cc..9b2c536 100755 --- a/bin/analysis/squidpy_entry_point.py +++ b/bin/analysis/squidpy_entry_point.py @@ -14,6 +14,7 @@ from common import Assay from plot_utils import new_plot import spatialdata +import spatialdata_plot from spatialdata.models import Image2DModel, Image3DModel, Labels2DModel, Labels3DModel, PointsModel, ShapesModel, TableModel from math import ceil, log2 @@ -48,7 +49,7 @@ def get_img_spatialdata(img_dir: Path): img_for_sdata = Image2DModel.parse( data=image_data_squeezed, - dims=['x','y','c'], + dims=['y','x','c'], scale_factors=image_scale_factors, ) @@ -88,17 +89,22 @@ def main(assay: Assay, h5ad_file: Path, img_dir: Path = None): sdata = spatialdata.SpatialData(images={'visium_fullres_img':img_for_sdata}, shapes={'visium':shapes_for_sdata}, table=table_for_sdata) + sdata.pl.render_images('visium_fullres_img').pl.render_shapes('visium', color='leiden').pl.show() + plt.savefig('spatial_scatter.pdf', bbox_inches='tight') + else: - sdata = spatialdata.SpatialData(shapes={'visium':shapes_for_sdata},table=table_for_sdata) + sdata = spatialdata.SpatialData(shapes={'slideseq':shapes_for_sdata},table=table_for_sdata) - sdata.write('Visium.zarr') + output_file_stem_dict = {Assay.VISIUM_FF:"Visium", Assay.SLIDESEQ:"Slideseq"} + output_file_stem = output_file_stem_dict[assay] + sdata.write(f'{output_file_stem}.zarr') sq.gr.spatial_neighbors(adata) sq.gr.nhood_enrichment(adata, cluster_key="leiden") - with new_plot(): - sq.pl.spatial_scatter(adata, color="leiden") - plt.savefig("spatial_scatter.pdf", bbox_inches="tight") +# with new_plot(): +# sq.pl.spatial_scatter(adata, color="leiden") +# plt.savefig("spatial_scatter.pdf", bbox_inches="tight") with new_plot(): sq.pl.nhood_enrichment(adata, cluster_key="leiden") diff --git a/steps/squidpy-analysis.cwl b/steps/squidpy-analysis.cwl index 38be786..c6b17f8 100644 --- a/steps/squidpy-analysis.cwl +++ b/steps/squidpy-analysis.cwl @@ -27,7 +27,7 @@ outputs: sdata_zarr: type: Directory? outputBinding: - glob: Visium.zarr + glob: *.zarr neighborhood_enrichment_plot: type: File? outputBinding: From 4a62bbf838b48441282bff3a692394b9429af9c3 Mon Sep 17 00:00:00 2001 From: Sean Date: Mon, 25 Aug 2025 09:05:56 -0400 Subject: [PATCH 5/5] Adding handling for slideseq data --- bin/analysis/squidpy_entry_point.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/analysis/squidpy_entry_point.py b/bin/analysis/squidpy_entry_point.py index 9b2c536..22fd826 100755 --- a/bin/analysis/squidpy_entry_point.py +++ b/bin/analysis/squidpy_entry_point.py @@ -58,7 +58,11 @@ def get_img_spatialdata(img_dir: Path): def get_shapes_spatialdata(adata:anndata.AnnData): geo_df = geopandas.GeoDataFrame(index=adata.obs.index) - radius = adata.uns['spatial']['visium']['scalefactors']['spot_diameter_fullres'] / 2 + if 'spatial' in adata.uns:#visium + radius = adata.uns['spatial']['visium']['scalefactors']['spot_diameter_fullres'] / 2 + else:#slideseq + radius = 5 + adata.obsm['spatial'] = adata.obsm['X_spatial'] radius_series = pd.Series(radius, index=geo_df.index) coords = adata.obsm['spatial'] points_list = [shapely.Point(coords[i]) for i in range(len(adata.obs.index))]