Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
700f916
chore: dependencies
Adames4 Jan 25, 2026
65e86db
feat: quality control
Adames4 Jan 25, 2026
59965a4
feat: add dataset configuration files for ftn, ikem, and knl_patos
Adames4 Jan 25, 2026
234d611
fix: output dir
Adames4 Jan 25, 2026
4930434
fix: typo
Adames4 Jan 25, 2026
180afe7
chore: Merge branch 'feature/tissue-masks' into feature/tiling
Adames4 Jan 26, 2026
456d4ae
chore: add tiling libs
Adames4 Jan 27, 2026
e88bfd8
fix: naming
Adames4 Jan 27, 2026
b91d36d
feat: confs
Adames4 Jan 27, 2026
413db4f
feat: tiling
Adames4 Jan 27, 2026
f5b4045
fix: confs
Adames4 Jan 27, 2026
2a578d1
fix: typo
Adames4 Jan 27, 2026
9262f3e
fix: typo
Adames4 Jan 27, 2026
b852be9
fix: typo
Adames4 Jan 27, 2026
bd8ca9f
fix: dataset index
Adames4 Jan 27, 2026
08d3320
feat: update tiling to latest ratiopath
Adames4 Jan 27, 2026
3ab6d5a
fix: use tile overlay overlap as udfexpr
Adames4 Jan 27, 2026
06e957e
chore: ratiopath from github
Adames4 Jan 28, 2026
de0fd75
fix: WIP
Adames4 Jan 28, 2026
e56216b
fix: None in overlap
Adames4 Jan 28, 2026
95cc81c
feat: confs
Adames4 Jan 28, 2026
754698f
chore: dependencies
Adames4 Jan 28, 2026
178f294
feat: tile = stride
Adames4 Jan 31, 2026
e4d2499
fix: typo
Adames4 Jan 31, 2026
acfbf19
fix: glob over changing dir
Adames4 Jan 31, 2026
659f505
fix: finish the run
Adames4 Jan 31, 2026
45ac3b3
fix: rever last commit
Adames4 Jan 31, 2026
3c7a5fc
feat: conf
Adames4 Jan 31, 2026
774f24a
fix: splits
Adames4 Jan 31, 2026
076ba93
feat: tweaking resources
Adames4 Jan 31, 2026
2788656
feat: confs
Adames4 Jan 31, 2026
25bec6e
chore: dependecies
Adames4 Feb 1, 2026
5faa224
fix: add paths
Adames4 Feb 1, 2026
eb96f56
fix: conf
Adames4 Feb 5, 2026
89a4582
fix: typo
Adames4 Feb 5, 2026
18c556d
chore: dependencies
Adames4 Feb 5, 2026
35cfb5c
fix: group splitting
Adames4 Feb 6, 2026
c98608e
chore: Merge branch 'feature/dataset' into feature/quality-control
Adames4 Feb 11, 2026
bc71668
feat: configs
Adames4 Feb 11, 2026
6647d0d
chore: Merge branch 'feature/tissue-masks' into feature/tiling
Adames4 Feb 11, 2026
2dfb32b
chore: Merge branch 'feature/quality-control' into feature/tiling
Adames4 Feb 11, 2026
30d4494
feat: configs
Adames4 Feb 12, 2026
88eb5dc
fix: conf
Adames4 Feb 12, 2026
4269a3a
fix: PR
Adames4 Feb 12, 2026
a9615ed
fix: repo
Adames4 Feb 12, 2026
3b7f95c
chore: Merge branch 'feature/quality-control' into feature/tiling
Adames4 Feb 12, 2026
659910e
fix: repo
Adames4 Feb 12, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions configs/dataset/processed_w_masks/ftn.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- /dataset/processed/ftn@_here_
- _self_

tissue_mask_uri: mlflow-artifacts:/86/04778b10de254572b69ce0a101c1eee4/artifacts/tissue_masks # TODO update URI
qc_mask_uri: mlflow-artifacts:/86/c8edfb2541e84b44b1a28be3540c1a35/artifacts # TODO update URI
Comment on lines +5 to +6

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The artifact URIs are hardcoded and marked with a TODO. This should be updated with the final URIs before merging. For better maintainability, consider if these could be passed in via a more dynamic configuration method rather than being hardcoded in multiple files.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Waiting for #3 , #4

Comment on lines +5 to +6
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dataset config hard-codes MLflow artifact URIs and leaves a # TODO update URI note. If these URIs are not final/stable, the tiling pipeline will fail or use incorrect artifacts. Please either update them to the final run URIs, or switch to a placeholder/override-based approach so the committed default config is usable.

Suggested change
tissue_mask_uri: mlflow-artifacts:/86/04778b10de254572b69ce0a101c1eee4/artifacts/tissue_masks # TODO update URI
qc_mask_uri: mlflow-artifacts:/86/c8edfb2541e84b44b1a28be3540c1a35/artifacts # TODO update URI
tissue_mask_uri: OVERRIDE_ME_TISSUE_MASK_URI
qc_mask_uri: OVERRIDE_ME_QC_MASK_URI

Copilot uses AI. Check for mistakes.
6 changes: 6 additions & 0 deletions configs/dataset/processed_w_masks/ikem.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- /dataset/processed/ikem@_here_
- _self_

tissue_mask_uri: mlflow-artifacts:/86/13359cdd5d1a47ddabc352b9aa0d7635/artifacts/tissue_masks # TODO update URI
qc_mask_uri: mlflow-artifacts:/86/98443fe2b67445d5a56598bff15b7f27/artifacts # TODO update URI
Comment on lines +5 to +6

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Similar to other dataset configurations, the artifact URIs here are hardcoded and marked with a TODO. Please update them with the final URIs. Centralizing this configuration could prevent having to update multiple files.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Waiting for #3 , #4

6 changes: 6 additions & 0 deletions configs/dataset/processed_w_masks/knl_patos.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
defaults:
- /dataset/processed/knl_patos@_here_
- _self_

tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks # TODO update URI
qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts # TODO update URI
Comment on lines +5 to +6

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The artifact URIs are hardcoded with a TODO comment. Please ensure these are updated to the correct, final URIs. Having these values hardcoded in multiple files can be error-prone; a centralized configuration would be more robust.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Waiting for #3 , #4

Comment on lines +5 to +6
Copy link

Copilot AI Feb 12, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This dataset config hard-codes MLflow artifact URIs and leaves a # TODO update URI note. If these URIs are not final/stable, the tiling pipeline will fail or use incorrect artifacts. Please either update them to the final run URIs, or switch to a placeholder/override-based approach so the committed default config is usable.

Suggested change
tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks # TODO update URI
qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts # TODO update URI
tissue_mask_uri: ${oc.env:TISSUE_MASK_URI,}
qc_mask_uri: ${oc.env:QC_MASK_URI,}

Copilot uses AI. Check for mistakes.
Empty file removed configs/experiment/.gitkeep
Empty file.
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ftn_0_320px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ftn@dataset
- _self_

mpp: 0.17 # level 0
tile_extent: 320
stride: 320

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ftn_0_75um.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ftn@dataset
- _self_

mpp: 0.17 # level 0
tile_extent: 430 # 75 / 0.17 ≈ 430
stride: 430

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ftn_1_224px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ftn@dataset
- _self_

mpp: 0.52 # level 1
tile_extent: 224
stride: 112

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ftn_2_224px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ftn@dataset
- _self_

mpp: 1.55 # level 2
tile_extent: 224
stride: 112

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ikem_0_320px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ikem@dataset
- _self_

mpp: 0.17 # level 0
tile_extent: 320
stride: 320

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ikem_0_75um.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ikem@dataset
- _self_

mpp: 0.17 # level 0
tile_extent: 430 # 75 / 0.17 ≈ 430
stride: 430

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ikem_1_224px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ikem@dataset
- _self_

mpp: 0.52 # level 1
tile_extent: 224
stride: 112

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/ikem_2_224px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/ikem@dataset
- _self_

mpp: 1.55 # level 2
tile_extent: 224
stride: 112

splits:
train: 0.7
test_preliminary: 0.15
test_final: 0.15
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/knl_patos@dataset
- _self_

mpp: 0.17 # level 0
tile_extent: 320
stride: 320

splits:
train: 0.0
test_preliminary: 0.5
test_final: 0.5
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/knl_patos@dataset
- _self_

mpp: 0.17 # level 0
tile_extent: 430 # 75 / 0.17 ≈ 430
stride: 430

splits:
train: 0.0
test_preliminary: 0.5
test_final: 0.5
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/knl_patos@dataset
- _self_

mpp: 0.52 # level 1
tile_extent: 224
stride: 112

splits:
train: 0.0
test_preliminary: 0.5
test_final: 0.5
14 changes: 14 additions & 0 deletions configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

defaults:
- /dataset/processed_w_masks/knl_patos@dataset
- _self_

mpp: 1.55 # level 2
tile_extent: 224
stride: 112

splits:
train: 0.0
test_preliminary: 0.5
test_final: 0.5
26 changes: 26 additions & 0 deletions configs/preprocessing/quality_control.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# @package _global_

output_dir: ${project_dir}/quality_control/${dataset.institution}

request_timeout: 18000
max_concurrent: 5

qc_parameters:
mask_level: 3
sample_level: 1
check_residual: True
check_folding: False
check_focus: True
wb_correction: True


metadata:
run_name: "🎭 QC Masks: ${dataset.institution}"
description: Quality control masks for ${dataset.institution} institution
hyperparams:
mask_level: ${qc_parameters.mask_level}
sample_level: ${qc_parameters.sample_level}
check_residual: ${qc_parameters.check_residual}
check_folding: ${qc_parameters.check_folding}
check_focus: ${qc_parameters.check_focus}
wb_correction: ${qc_parameters.wb_correction}
20 changes: 20 additions & 0 deletions configs/preprocessing/tiling.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# @package _global_

mpp: ???
tile_extent: ???
stride: ???
tissue_threshold: 0.5

splits:
train: ???
test_preliminary: ???
test_final: ???

metadata:
run_name: "🧱 Tiling: ${dataset.institution} ${tile_extent}"
description: Tile extraction for ${dataset.institution} institution with tile extent ${tile_extent}
hyperparams:
mpp: ${mpp}
tile_extent: ${tile_extent}
stride: ${stride}
tissue_threshold: ${tissue_threshold}
122 changes: 122 additions & 0 deletions preprocessing/quality_control.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# credits: https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/lymph-nodes/-/blob/develop/preprocessing/qc.py?ref_type=heads

import asyncio
from collections.abc import Generator
from pathlib import Path
from typing import TypedDict

import hydra
import mlflow.artifacts
import pandas as pd
import rationai
from omegaconf import DictConfig
from rationai.mlkit import autolog, with_cli_args
from rationai.mlkit.lightning.loggers import MLFlowLogger
from rationai.types import SlideCheckConfig
from tqdm.asyncio import tqdm


class QCParameters(TypedDict):
mask_level: int
sample_level: int
check_residual: bool
check_folding: bool
check_focus: bool
wb_correction: bool


def get_qc_masks(qc_parameters: QCParameters) -> Generator[tuple[str, str], None, None]:
if qc_parameters["check_focus"]:
yield ("Piqe_focus_score_piqe_median", "blur_per_tile")
yield ("Piqe_piqe_median_activity_mask", "blur_per_pixel")

if qc_parameters["check_residual"]:
yield ("ResidualArtifactsAndCoverage_cov_percent_heatmap", "artifacts_per_tile")
yield ("ResidualArtifactsAndCoverage_coverage_mask", "artifacts_per_pixel")

if qc_parameters["check_folding"]:
yield ("FoldingFunction_folding_test", "folds_per_pixel")


def organize_masks(output_path: Path, subdir: str, mask_prefix: str) -> None:
prefix_dir = output_path / subdir
prefix_dir.mkdir(parents=True, exist_ok=True)

# Glob has to be wrapped in list, because we're modifying the directory!!!
for file in list(output_path.glob(f"{mask_prefix}_*.tiff")):
slide_name = file.name.replace(f"{mask_prefix}_", "")
destination = prefix_dir / slide_name
file.rename(destination)


async def qc_main(
output_path: str,
slides: list[str],
logger: MLFlowLogger,
request_timeout: int,
max_concurrent: int,
qc_parameters: QCParameters,
) -> None:
async with rationai.AsyncClient() as client: # type: ignore[attr-defined]
async for result in tqdm(
client.qc.check_slides(
slides,
output_path,
config=SlideCheckConfig(**qc_parameters),
timeout=request_timeout,
max_concurrent=max_concurrent,
),
total=len(slides),
):
if not result.success:
with open(Path(output_path) / "qc_errors.log", "a") as log_file:
log_file.write(
f"Failed to process {result.wsi_path}: {result.error}\n"
)

# Organize generated masks into subdirectories
for prefix, artifact_name in get_qc_masks(qc_parameters):
organize_masks(Path(output_path), artifact_name, prefix)

# Merge generated csv files
csvs = list(Path(output_path).glob("*.csv"))
pd.concat([pd.read_csv(f) for f in csvs]).to_csv(
Path(output_path, "qc_metrics.csv"), index=False
)

# Remove individual csv files
for f in csvs:
f.unlink()

logger.log_artifacts(local_dir=output_path)


def download_dataset(uri: str) -> pd.DataFrame:
path = mlflow.artifacts.download_artifacts(artifact_uri=uri)
df = pd.read_csv(path)
return df


@with_cli_args(["+preprocessing=quality_control"])
@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None)
@autolog
def main(config: DictConfig, logger: MLFlowLogger) -> None:
dataset = download_dataset(config.dataset.uri)

output_path = Path(config.output_dir)
output_path.mkdir(parents=True, exist_ok=True)

asyncio.run(
qc_main(
output_path=output_path.absolute().as_posix(),
slides=dataset["path"].to_list(),
logger=logger,
request_timeout=config.request_timeout,
max_concurrent=config.max_concurrent,
qc_parameters=config.qc_parameters,
)
)


if __name__ == "__main__":
main()
Loading