diff --git a/bin/analysis/squidpy_entry_point.py b/bin/analysis/squidpy_entry_point.py index 4c7691e..22fd826 100755 --- a/bin/analysis/squidpy_entry_point.py +++ b/bin/analysis/squidpy_entry_point.py @@ -6,41 +6,82 @@ from typing import Iterable, Tuple import anndata -import cv2 import manhole import matplotlib.pyplot as plt -import numpy as np -import pandas as pd -import scanpy as sc import squidpy as sq import tifffile as tf from common import Assay from plot_utils import new_plot +import spatialdata +import spatialdata_plot + +from spatialdata.models import Image2DModel, Image3DModel, Labels2DModel, Labels3DModel, PointsModel, ShapesModel, TableModel +from math import ceil, log2 +from aicsimageio import AICSImage +import geopandas +import shapely +import pandas as pd -ome_tiff_pattern = re.compile(r"(?P.*)\.ome\.tiff(f?)$") +desired_pixel_size_for_pyramid = 250 +ome_tiff_pattern = re.compile(r"(?P.*)\.ome\.tiff(f?)$") -def find_ome_tiffs(input_dir: Path) -> Iterable[Path]: - """ - Yields 2-tuples: - [0] full Path to source file - [1] output file Path (source file relative to input_dir) - """ +def find_ome_tiff(input_dir: Path) -> Path: for dirpath_str, _, filenames in walk(input_dir): dirpath = Path(dirpath_str) for filename in filenames: if ome_tiff_pattern.match(filename): src_filepath = dirpath / filename - yield src_filepath - + return src_filepath + +def get_img_spatialdata(img_dir: Path): + print("Loading image data") + image_file = find_ome_tiff(img_dir) + image = AICSImage(image_file) + image_data_squeezed = image.data.squeeze() + print("... done. Original shape:", image.data.shape) + print(f"New shape: {image_data_squeezed.shape}") + + image_scale_factors = (2,) * ceil( + log2(max(image_data_squeezed.shape[1:]) / desired_pixel_size_for_pyramid) + ) + + img_for_sdata = Image2DModel.parse( + data=image_data_squeezed, + dims=['y','x','c'], + scale_factors=image_scale_factors, + ) + + return img_for_sdata + + +def get_shapes_spatialdata(adata:anndata.AnnData): + geo_df = geopandas.GeoDataFrame(index=adata.obs.index) + if 'spatial' in adata.uns:#visium + radius = adata.uns['spatial']['visium']['scalefactors']['spot_diameter_fullres'] / 2 + else:#slideseq + radius = 5 + adata.obsm['spatial'] = adata.obsm['X_spatial'] + radius_series = pd.Series(radius, index=geo_df.index) + coords = adata.obsm['spatial'] + points_list = [shapely.Point(coords[i]) for i in range(len(adata.obs.index))] + points_series = geopandas.GeoSeries(points_list, index=adata.obs.index) + geo_df['geometry'] = points_series + geo_df['radius'] = radius_series + return ShapesModel.parse(geo_df) def main(assay: Assay, h5ad_file: Path, img_dir: Path = None): if assay in {Assay.VISIUM_FF, Assay.SLIDESEQ}: adata = anndata.read(h5ad_file) + + table_for_sdata = TableModel.parse(adata) + shapes_for_sdata = get_shapes_spatialdata(adata) + adata.obsm["spatial"] = adata.obsm["X_spatial"] + if img_dir: - tiff_file = list(find_ome_tiffs(input_dir=img_dir))[0] + tiff_file = find_ome_tiff(input_dir=img_dir) img = tf.imread(fspath(tiff_file)) library_id = list(adata.uns["spatial"].keys())[0] adata.uns["spatial"][library_id]["images"] = {"hires": img} @@ -48,13 +89,26 @@ def main(assay: Assay, h5ad_file: Path, img_dir: Path = None): "tissue_hires_scalef": 1.0, "spot_diameter_fullres": 89, } + img_for_sdata = get_img_spatialdata(img_dir) + + sdata = spatialdata.SpatialData(images={'visium_fullres_img':img_for_sdata}, shapes={'visium':shapes_for_sdata}, table=table_for_sdata) + + sdata.pl.render_images('visium_fullres_img').pl.render_shapes('visium', color='leiden').pl.show() + plt.savefig('spatial_scatter.pdf', bbox_inches='tight') + + else: + sdata = spatialdata.SpatialData(shapes={'slideseq':shapes_for_sdata},table=table_for_sdata) + + output_file_stem_dict = {Assay.VISIUM_FF:"Visium", Assay.SLIDESEQ:"Slideseq"} + output_file_stem = output_file_stem_dict[assay] + sdata.write(f'{output_file_stem}.zarr') sq.gr.spatial_neighbors(adata) sq.gr.nhood_enrichment(adata, cluster_key="leiden") - with new_plot(): - sq.pl.spatial_scatter(adata, color="leiden") - plt.savefig("spatial_scatter.pdf", bbox_inches="tight") +# with new_plot(): +# sq.pl.spatial_scatter(adata, color="leiden") +# plt.savefig("spatial_scatter.pdf", bbox_inches="tight") with new_plot(): sq.pl.nhood_enrichment(adata, cluster_key="leiden") diff --git a/docker/squidpy/Dockerfile b/docker/squidpy/Dockerfile index 3c76eaf..7c7dbf3 100644 --- a/docker/squidpy/Dockerfile +++ b/docker/squidpy/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 ENV DEBIAN_FRONTEND=noninteractive @@ -11,7 +11,6 @@ RUN apt-get update \ flex \ git \ libfreetype6-dev \ - libigraph0-dev \ libxml2-dev \ libtool \ m4 \ diff --git a/docker/squidpy/requirements.txt b/docker/squidpy/requirements.txt index 466ff82..09643f7 100644 --- a/docker/squidpy/requirements.txt +++ b/docker/squidpy/requirements.txt @@ -1,6 +1,13 @@ +anndata==0.10.9 +aicsimageio +dask==2024.12.0 +manhole squidpy +numpy==2.1.3 opencv-python -numpy==1.22.0 -pandas==1.3.0 -manhole~=1.8 +spatialdata +spatialdata-io +spatialdata-plot +squidpy + diff --git a/pipeline.cwl b/pipeline.cwl index e14c13b..3eb3c5f 100644 --- a/pipeline.cwl +++ b/pipeline.cwl @@ -42,6 +42,10 @@ outputs: outputSource: salmon_quantification/raw_count_matrix type: File? label: "Unfiltered count matrix from Alevin, converted to H5AD, with intronic counts as separate columns" + sdata_zarr: + outputSource: squidpy_analysis/sdata_zarr + type: Directory? + label: "SpatialData object serialized in zarr format" genome_build_json: outputSource: salmon_quantification/genome_build_json type: File @@ -193,6 +197,7 @@ steps: source: img_dir out: - squidpy_annotated_h5ad + - sdata_zarr - neighborhood_enrichment_plot - co_occurrence_plot - interaction_matrix_plot diff --git a/steps/squidpy-analysis.cwl b/steps/squidpy-analysis.cwl index dce7d62..c6b17f8 100644 --- a/steps/squidpy-analysis.cwl +++ b/steps/squidpy-analysis.cwl @@ -24,6 +24,10 @@ outputs: type: File? outputBinding: glob: squidpy_annotated.h5ad + sdata_zarr: + type: Directory? + outputBinding: + glob: *.zarr neighborhood_enrichment_plot: type: File? outputBinding: