spatial-tk/example_config.toml at main · DamLabResources/spatial-tk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# Example TOML configuration file for spatial-tk pipeline
#
# Each command has its own section. CLI arguments override config values.
# Use this file to make your pipelines reproducible across runs.
#
# Usage:
#   spatial-tk concat --config example_config.toml --input samples.csv
#   spatial-tk normalize --config example_config.toml --input merged.zarr
#   etc.

[concat]
# Path to CSV file listing samples (columns: sample, path, [metadata...])
input = "samples.csv"
# Path to output concatenated .zarr file
output = "merged.zarr"
# Fraction of cells to keep (0-1, default: 1.0 = no downsampling)
downsample = 1.0

[normalize]
# Path to input .zarr file
input = "merged.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "normalized.zarr"
# Minimum number of genes expressed per cell
min_genes = 100
# Minimum number of cells expressing a gene
min_cells = 3
# Number of highly variable genes to select
n_top_genes = 2000
# Generate and save QC plots
save_plots = false

[cluster]
# Path to input normalized .zarr file
input = "normalized.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "clustered.zarr"
# Leiden clustering resolution(s), comma-separated for multiple resolutions
leiden_resolution = "0.2,0.5,1.0"
# Generate and save UMAP plots
save_plots = true

[spatial_neighbors]
# Path to input .zarr file
input = "clustered.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "neighbors.zarr"
# Optional table key from SpatialData.tables (null = first table)
table_key = null
# obsm key containing spatial coordinates
spatial_key = "spatial"
# Optional obs column containing per-cell library ids
library_key = null
# Optional single-library identifier convenience value
library_id = null
# Coordinate type: "grid", "generic", or null to let Squidpy infer
coord_type = null
# Number of neighbors for generic coordinates
n_neighs = 6
# Radius threshold as scalar or "min,max" string; null disables radius filtering
radius = null
# Transform adjacency matrix: "spectral", "cosine", or null
transform = null
# Prefix for output graph keys in obsp/uns
key_added = "spatial"

[spatial_cluster]
# Path to input .zarr file
input = "neighbors.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "spatial_clustered.zarr"
# Optional table key from SpatialData.tables (null = first table)
table_key = null
# obs column containing cell-type labels used in neighborhood composition vectors
cell_type_key = "cell_type_res0p5"
# Connectivity matrix key in obsp, usually created by spatial_neighbors
connectivities_key = "spatial_connectivities"
# If connectivities_key is missing, compute neighbors on demand with this k
neighbor_k = null
# Spatial key and library key are used only when computing neighbors on demand
spatial_key = "spatial"
library_key = null
# Output obs column for selected spatial cluster labels
output_key = "spatial_cluster"
# uns key for detailed results
results_key = "spatial_cluster"
# Clustering mode: "kmeans" or "hdbscan"
mode = "kmeans"
# K-means sweep bounds
min_clusters = 2
max_clusters = 20
# Optional override for final selected cluster count
force_n_clusters = null
# Random seed for reproducibility
random_state = 0
# HDBSCAN parameters (used when mode = "hdbscan")
hdbscan_min_cluster_size = 5
hdbscan_min_samples = null
hdbscan_cluster_selection_epsilon = 0.0
hdbscan_metric = "euclidean"
hdbscan_allow_single_cluster = false
# Include each focal cell in its own neighborhood vector
include_self = true
# Normalize neighborhood vectors to cell-type proportions
normalize_composition = true

[quantitate]
# Path to input clustered .zarr file
input = "clustered.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "scored.zarr"
# Path to CSV file with marker genes (columns: cell_type, gene)
# At least one of markers or preset_resources is required
markers = "markers.csv"
# Key suffix for custom marker scores (stored as obsm['score_mlm_<score_key>'])
score_key = "custom"
# Scoring method: "mlm" (default) or "ulm"
method = "mlm"
# Minimum number of targets per source for decoupler
tmin = 2
# Comma-separated built-in resources to score: panglao, hallmark, collectri, dorothea, progeny
# Leave empty or remove to skip preset scoring
preset_resources = null
# Minimum sensitivity for PanglaoDB markers (used when panglao is in preset_resources)
panglao_min_sensitivity = 0.5
# Only use canonical PanglaoDB markers
panglao_canonical_only = true
# Optional cell filter: "column==value" (e.g. "cell_type==Fibroblast")
# If null, all cells are scored
filter_obs = null
# Generate and save enrichment heatmap plots
save_plots = false

[assign]
# Path to input scored .zarr file (produced by quantitate)
input = "scored.zarr"
# Modify the input file in place instead of creating a new file
inplace = false
# Path to output .zarr file (required unless inplace = true)
output = "annotated.zarr"
# Full obsm key of the score matrix to use (must match quantitate output)
# e.g. "score_mlm_custom" or "score_mlm_PanglaoDB"
score_key = "score_mlm_custom"
# Cluster column key to assign (e.g. "leiden_res0p5")
# If null, all leiden_res* columns are used
cluster_key = null
# obs column name for cell type labels; defaults to "cell_type_res{resolution}"
annotation_key = null
# Assignment strategy: "top_positive" (default)
strategy = "top_positive"
# Run differential expression per cluster key
run_de = true
# Generate and save annotation plots
save_plots = true

[differential]
# Path to input .zarr file with annotations
input = "annotated.zarr"
# Directory to save differential analysis results
output_dir = "results/"
# Column in obs to group by for differential analysis
# (e.g., "status", "cell_type", or "leiden_res0p5")
groupby = "leiden_res0p5"
# Comma-separated list of exactly 2 groups to compare (Mode A)
# E.g., "HIV,NEG". If null, finds markers for all groups (Mode B)
compare_groups = null
# Optional obsm layer to use for enrichment-based differential analysis
# (e.g., "score_mlm_PanglaoDB")
obsm_layer = null
# Statistical test method for gene expression DE
# Options: "wilcoxon", "t-test", "logreg"
method = "wilcoxon"
# Layer to use for gene expression (null uses .X)
layer = null
# Generate and save differential analysis plots
save_plots = false
# Number of top genes to save per group
n_genes = 100