From 700f91609c177bfe9b5e9f5c58819aee7a7acdc2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 25 Jan 2026 22:39:35 +0000 Subject: [PATCH 01/42] chore: dependencies --- preprocessing/.gitkeep | 0 pyproject.toml | 2 ++ uv.lock | 67 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) delete mode 100644 preprocessing/.gitkeep diff --git a/preprocessing/.gitkeep b/preprocessing/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index 39445e2..7265715 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ dependencies = [ "openpyxl>=3.1.5", "pandas>=2.3.3", "pyvips<3.1", + "rationai-sdk", "rationai-mlkit", "ray>=2.52.1", "torch>=2.9.0", @@ -26,3 +27,4 @@ run = ["rationai-kube-jobs"] [tool.uv.sources] rationai-mlkit = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" } rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" } +rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" } diff --git a/uv.lock b/uv.lock index baa2405..29c0ba2 100644 --- a/uv.lock +++ b/uv.lock @@ -589,6 +589,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "httpcore" +version = "1.0.9" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "h11" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, +] + +[[package]] +name = "httpx" +version = "0.28.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "certifi" }, + { name = "httpcore" }, + { name = "idna" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, +] + [[package]] name = "hydra-core" version = "1.3.2" @@ -873,6 +901,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/73/3d757cb3fc16f0f9794dd289bcd0c4a031d9cf54d8137d6b984b2d02edf3/lightning_utilities-0.15.2-py3-none-any.whl", hash = "sha256:ad3ab1703775044bbf880dbf7ddaaac899396c96315f3aa1779cec9d618a9841", size = 29431, upload-time = "2025-08-06T13:57:38.046Z" }, ] +[[package]] +name = "lz4" +version = "4.4.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/51/f1b86d93029f418033dddf9b9f79c8d2641e7454080478ee2aab5123173e/lz4-4.4.5.tar.gz", hash = "sha256:5f0b9e53c1e82e88c10d7c180069363980136b9d7a8306c4dca4f760d60c39f0", size = 172886, upload-time = "2025-11-03T13:02:36.061Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1b/ac/016e4f6de37d806f7cc8f13add0a46c9a7cfc41a5ddc2bc831d7954cf1ce/lz4-4.4.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:df5aa4cead2044bab83e0ebae56e0944cc7fcc1505c7787e9e1057d6d549897e", size = 207163, upload-time = "2025-11-03T13:01:45.895Z" }, + { url = "https://files.pythonhosted.org/packages/8d/df/0fadac6e5bd31b6f34a1a8dbd4db6a7606e70715387c27368586455b7fc9/lz4-4.4.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6d0bf51e7745484d2092b3a51ae6eb58c3bd3ce0300cf2b2c14f76c536d5697a", size = 207150, upload-time = "2025-11-03T13:01:47.205Z" }, + { url = "https://files.pythonhosted.org/packages/b7/17/34e36cc49bb16ca73fb57fbd4c5eaa61760c6b64bce91fcb4e0f4a97f852/lz4-4.4.5-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7b62f94b523c251cf32aa4ab555f14d39bd1a9df385b72443fd76d7c7fb051f5", size = 1292045, upload-time = "2025-11-03T13:01:48.667Z" }, + { url = "https://files.pythonhosted.org/packages/90/1c/b1d8e3741e9fc89ed3b5f7ef5f22586c07ed6bb04e8343c2e98f0fa7ff04/lz4-4.4.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2c3ea562c3af274264444819ae9b14dbbf1ab070aff214a05e97db6896c7597e", size = 1279546, upload-time = "2025-11-03T13:01:50.159Z" }, + { url = "https://files.pythonhosted.org/packages/55/d9/e3867222474f6c1b76e89f3bd914595af69f55bf2c1866e984c548afdc15/lz4-4.4.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:24092635f47538b392c4eaeff14c7270d2c8e806bf4be2a6446a378591c5e69e", size = 1368249, upload-time = "2025-11-03T13:01:51.273Z" }, + { url = "https://files.pythonhosted.org/packages/b2/e7/d667d337367686311c38b580d1ca3d5a23a6617e129f26becd4f5dc458df/lz4-4.4.5-cp312-cp312-win32.whl", hash = "sha256:214e37cfe270948ea7eb777229e211c601a3e0875541c1035ab408fbceaddf50", size = 88189, upload-time = "2025-11-03T13:01:52.605Z" }, + { url = "https://files.pythonhosted.org/packages/a5/0b/a54cd7406995ab097fceb907c7eb13a6ddd49e0b231e448f1a81a50af65c/lz4-4.4.5-cp312-cp312-win_amd64.whl", hash = "sha256:713a777de88a73425cf08eb11f742cd2c98628e79a8673d6a52e3c5f0c116f33", size = 99497, upload-time = "2025-11-03T13:01:53.477Z" }, + { url = "https://files.pythonhosted.org/packages/6a/7e/dc28a952e4bfa32ca16fa2eb026e7a6ce5d1411fcd5986cd08c74ec187b9/lz4-4.4.5-cp312-cp312-win_arm64.whl", hash = "sha256:a88cbb729cc333334ccfb52f070463c21560fca63afcf636a9f160a55fac3301", size = 91279, upload-time = "2025-11-03T13:01:54.419Z" }, +] + [[package]] name = "mako" version = "1.3.10" @@ -1765,6 +1809,18 @@ dependencies = [ { name = "torch" }, ] +[[package]] +name = "rationai-sdk" +version = "0.1.0" +source = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git#a4d25084850cd26678783485dda87bbeed949492" } +dependencies = [ + { name = "httpx" }, + { name = "lz4" }, + { name = "numpy" }, + { name = "pillow" }, + { name = "tenacity" }, +] + [[package]] name = "ray" version = "2.53.0" @@ -2025,6 +2081,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353, upload-time = "2025-04-27T18:04:59.103Z" }, ] +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, +] + [[package]] name = "threadpoolctl" version = "3.6.0" @@ -2177,6 +2242,7 @@ dependencies = [ { name = "pandas" }, { name = "pyvips" }, { name = "rationai-mlkit" }, + { name = "rationai-sdk" }, { name = "ray" }, { name = "torch" }, { name = "torchmetrics" }, @@ -2202,6 +2268,7 @@ requires-dist = [ { name = "pandas", specifier = ">=2.3.3" }, { name = "pyvips", specifier = "<3.1" }, { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, + { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, From 65e86dbe3ee0471fdd1a2f917c8e7f66687a4b13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 25 Jan 2026 22:39:48 +0000 Subject: [PATCH 02/42] feat: quality control --- configs/preprocessing/qualitty_control.yaml | 26 +++++ preprocessing/quality_control.py | 118 ++++++++++++++++++++ scripts/preprocessing/quality_control.py | 19 ++++ 3 files changed, 163 insertions(+) create mode 100644 configs/preprocessing/qualitty_control.yaml create mode 100644 preprocessing/quality_control.py create mode 100644 scripts/preprocessing/quality_control.py diff --git a/configs/preprocessing/qualitty_control.yaml b/configs/preprocessing/qualitty_control.yaml new file mode 100644 index 0000000..8f3d84c --- /dev/null +++ b/configs/preprocessing/qualitty_control.yaml @@ -0,0 +1,26 @@ +# @package _global_ + +output_dir: ${project_dir}/quality_control + +request_timeout: 18000 +max_concurrent: 5 + +qc_parameters: + mask_level: 3 + sample_level: 1 + check_residual: True + check_folding: False + check_focus: True + wb_correction: True + + +metadata: + run_name: "🎭 QC Masks: ${dataset.institution}" + description: Quality control masks for ${dataset.institution} institution + hyperparams: + mask_level: ${qc_parameters.mask_level} + sample_level: ${qc_parameters.sample_level} + check_residual: ${qc_parameters.check_residual} + check_folding: ${qc_parameters.check_folding} + check_focus: ${qc_parameters.check_focus} + wb_correction: ${qc_parameters.wb_correction} \ No newline at end of file diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py new file mode 100644 index 0000000..6ac80fb --- /dev/null +++ b/preprocessing/quality_control.py @@ -0,0 +1,118 @@ +# credits: https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/lymph-nodes/-/blob/develop/preprocessing/qc.py?ref_type=heads + +import asyncio +from collections.abc import Generator +from pathlib import Path +from typing import TypedDict + +import hydra +import mlflow.artifacts +import pandas as pd +import rationai +from omegaconf import DictConfig +from rationai.mlkit import autolog, with_cli_args +from rationai.mlkit.lightning.loggers import MLFlowLogger +from rationai.types import SlideCheckConfig +from tqdm.asyncio import tqdm + + +class QCParameters(TypedDict): + mask_level: int + sample_level: int + check_residual: bool + check_folding: bool + check_focus: bool + wb_correction: bool + + +def get_qc_masks(qc_parameters: QCParameters) -> Generator[tuple[str, str], None, None]: + if qc_parameters["check_residual"]: + yield ("Piqe_focus_score_piqe_median", "blur_per_tile") + yield ("Piqe_piqe_median_activity_mask", "blur_per_pixel") + + if qc_parameters["check_focus"]: + yield ("ResidualArtifactsAndCoverage_cov_percent_heatmap", "artifacts_per_tile") + yield ("ResidualArtifactsAndCoverage_coverage_mask", "artifacts_per_pixel") + + if qc_parameters["check_folding"]: + yield ("FoldingFunction_folding_test", "folds_per_pixel") + + +def organize_masks(output_path: Path, subdir: str, mask_prefix: str) -> None: + prefix_dir = output_path / subdir + prefix_dir.mkdir(parents=True, exist_ok=True) + + for file in output_path.glob(f"{mask_prefix}_*.tiff"): + slide_name = file.name.replace(f"{mask_prefix}_", "") + destination = prefix_dir / slide_name + file.rename(destination) + + +async def qc_main( + output_path: str, + slides: list[str], + logger: MLFlowLogger, + request_timeout: int, + max_concurrent: int, + qc_parameters: QCParameters, +) -> None: + async with rationai.AsyncClient() as client: # type: ignore[attr-defined] + async for result in tqdm( + client.qc.check_slides( + slides, + output_path, + config=SlideCheckConfig(**qc_parameters), + timeout=request_timeout, + max_concurrent=max_concurrent, + ), + total=len(slides), + ): + if not result.success: + with open(Path(output_path) / "qc_errors.log", "a") as log_file: + log_file.write( + f"Failed to process {result.wsi_path}: {result.error}\n" + ) + + # Organize generated masks into subdirectories + for prefix, artifact_name in get_qc_masks(qc_parameters): + organize_masks(Path(output_path), artifact_name, prefix) + + # Merge generated csv files + csvs = list(Path(output_path).glob("*.csv")) + pd.concat([pd.read_csv(f) for f in csvs]).to_csv( + Path(output_path, "qc_metrics.csv"), index=False + ) + + # Remove individual csv files + for f in csvs: + f.unlink() + + logger.log_artifacts(local_dir=output_path) + + +def download_dataframe(uri: str) -> pd.DataFrame: + path = mlflow.artifacts.download_artifacts(artifact_uri=uri) + df = pd.read_csv(path) + return df + + +@with_cli_args(["+preprocessing=quality_control"]) +@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) +@autolog +def main(config: DictConfig, logger: MLFlowLogger) -> None: + df = download_dataframe(config.dataset.uri) + + asyncio.run( + qc_main( + output_path=Path(config.output_dir).absolute().as_posix(), + slides=df["path"].to_list(), + logger=logger, + request_timeout=config.request_timeout, + max_concurrent=config.max_concurrent, + qc_parameters=config.qc_parameters, + ) + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/preprocessing/quality_control.py b/scripts/preprocessing/quality_control.py new file mode 100644 index 0000000..cdb974e --- /dev/null +++ b/scripts/preprocessing/quality_control.py @@ -0,0 +1,19 @@ +from kube_jobs import storage, submit_job + + +COHORT = "ikem" # "ikem", "ftn", or "knl_patos" + +submit_job( + job_name=f"ulcerative-colitis-quality-control-{COHORT.replace('_', '-')}", + username=..., + public=False, + cpu=2, + memory="4Gi", + script=[ + "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", + "cd workdir", + "uv sync --frozen", + f"uv run -m preprocessing.quality_control +data=processed/{COHORT}", + ], + storage=[storage.secure.DATA], +) From 59965a4e34e98aef1e5529136999585709fecd5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 25 Jan 2026 22:43:41 +0000 Subject: [PATCH 03/42] feat: add dataset configuration files for ftn, ikem, and knl_patos --- configs/data/processed/ftn.yaml | 5 +++++ configs/data/processed/ikem.yaml | 5 +++++ configs/data/processed/knl_patos.yaml | 5 +++++ 3 files changed, 15 insertions(+) create mode 100644 configs/data/processed/ftn.yaml create mode 100644 configs/data/processed/ikem.yaml create mode 100644 configs/data/processed/knl_patos.yaml diff --git a/configs/data/processed/ftn.yaml b/configs/data/processed/ftn.yaml new file mode 100644 index 0000000..7c2d21a --- /dev/null +++ b/configs/data/processed/ftn.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +dataset: + institution: ftn + uri: mlflow-artifacts:/86/142642fc780f4a96800c691168b5c2c3/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/data/processed/ikem.yaml b/configs/data/processed/ikem.yaml new file mode 100644 index 0000000..9e84997 --- /dev/null +++ b/configs/data/processed/ikem.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +dataset: + institution: ikem + uri: mlflow-artifacts:/86/52d0081d60ba4585a16a3bd341d5ab09/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/data/processed/knl_patos.yaml b/configs/data/processed/knl_patos.yaml new file mode 100644 index 0000000..afbd976 --- /dev/null +++ b/configs/data/processed/knl_patos.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +dataset: + institution: knl_patos + uri: mlflow-artifacts:/86/afbcfd43cb3c4fd0b1e9b5dbe7327d91/artifacts/dataset.csv # TODO update URI \ No newline at end of file From 234d61187384e2e6372f46a1b5831487585949ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 25 Jan 2026 22:47:44 +0000 Subject: [PATCH 04/42] fix: output dir --- configs/preprocessing/qualitty_control.yaml | 2 +- preprocessing/quality_control.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/configs/preprocessing/qualitty_control.yaml b/configs/preprocessing/qualitty_control.yaml index 8f3d84c..80a6093 100644 --- a/configs/preprocessing/qualitty_control.yaml +++ b/configs/preprocessing/qualitty_control.yaml @@ -1,6 +1,6 @@ # @package _global_ -output_dir: ${project_dir}/quality_control +output_dir: ${project_dir}/quality_control/${dataset.institution} request_timeout: 18000 max_concurrent: 5 diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index 6ac80fb..bce3912 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -102,9 +102,12 @@ def download_dataframe(uri: str) -> pd.DataFrame: def main(config: DictConfig, logger: MLFlowLogger) -> None: df = download_dataframe(config.dataset.uri) + output_path = Path(config.output_dir) + output_path.mkdir(parents=True, exist_ok=True) + asyncio.run( qc_main( - output_path=Path(config.output_dir).absolute().as_posix(), + output_path=output_path.absolute().as_posix(), slides=df["path"].to_list(), logger=logger, request_timeout=config.request_timeout, From 49304343663b02d2ff30eaaaa1ab3639bfe202a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 25 Jan 2026 22:59:52 +0000 Subject: [PATCH 05/42] fix: typo --- .../preprocessing/{qualitty_control.yaml => quality_control.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename configs/preprocessing/{qualitty_control.yaml => quality_control.yaml} (100%) diff --git a/configs/preprocessing/qualitty_control.yaml b/configs/preprocessing/quality_control.yaml similarity index 100% rename from configs/preprocessing/qualitty_control.yaml rename to configs/preprocessing/quality_control.yaml From 456d4ae5f7ba7b2fde59c5219bed95ebbbc493c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:24:31 +0000 Subject: [PATCH 06/42] chore: add tiling libs --- pyproject.toml | 3 + uv.lock | 426 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 429 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 8e92ff8..3af5043 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,9 +16,11 @@ dependencies = [ "rationai-sdk", "rationai-mlkit", "rationai-masks", + "rationai-tiling", "ray>=2.52.1", "torch>=2.9.0", "torchmetrics>=1.8.2", + "ratiopath>=1.0.3", ] [dependency-groups] @@ -28,5 +30,6 @@ run = ["rationai-kube-jobs"] [tool.uv.sources] rationai-mlkit = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" } rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/masks.git" } +rationai-tiling = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" } rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" } rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" } diff --git a/uv.lock b/uv.lock index a614220..b05afd4 100644 --- a/uv.lock +++ b/uv.lock @@ -62,6 +62,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" }, ] +[[package]] +name = "albucore" +version = "0.0.24" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "opencv-python-headless" }, + { name = "simsimd" }, + { name = "stringzilla" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/13/69/d4cbcf2a5768bf91cd14ffef783520458431e5d2b22fbc08418d3ba09a88/albucore-0.0.24.tar.gz", hash = "sha256:f2cab5431fadf94abf87fd0c89d9f59046e49fe5de34afea8f89bc8390253746", size = 16981, upload-time = "2025-03-09T18:46:51.409Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0a/e2/91f145e1f32428e9e1f21f46a7022ffe63d11f549ee55c3b9265ff5207fc/albucore-0.0.24-py3-none-any.whl", hash = "sha256:adef6e434e50e22c2ee127b7a3e71f2e35fa088bcf54431e18970b62d97d0005", size = 15372, upload-time = "2025-03-09T18:46:50.177Z" }, +] + +[[package]] +name = "albumentations" +version = "2.0.8" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "albucore" }, + { name = "numpy" }, + { name = "opencv-python-headless" }, + { name = "pydantic" }, + { name = "pyyaml" }, + { name = "scipy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/f4/85eb56c3217b53bcfc2d12e840a0b18ca60902086321cafa5a730f9c0470/albumentations-2.0.8.tar.gz", hash = "sha256:4da95e658e490de3c34af8fcdffed09e36aa8a4edd06ca9f9e7e3ea0b0b16856", size = 354460, upload-time = "2025-05-27T21:23:17.415Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/64/013409c451a44b61310fb757af4527f3de57fc98a00f40448de28b864290/albumentations-2.0.8-py3-none-any.whl", hash = "sha256:c4c4259aaf04a7386ad85c7fdcb73c6c7146ca3057446b745cc035805acb1017", size = 369423, upload-time = "2025-05-27T21:23:15.609Z" }, +] + [[package]] name = "alembic" version = "1.18.1" @@ -354,6 +386,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, ] +[[package]] +name = "donfig" +version = "0.8.1.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/25/71/80cc718ff6d7abfbabacb1f57aaa42e9c1552bfdd01e64ddd704e4a03638/donfig-0.8.1.post1.tar.gz", hash = "sha256:3bef3413a4c1c601b585e8d297256d0c1470ea012afa6e8461dc28bfb7c23f52", size = 19506, upload-time = "2024-05-23T14:14:31.513Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/d5/c5db1ea3394c6e1732fb3286b3bd878b59507a8f77d32a2cebda7d7b7cd4/donfig-0.8.1.post1-py3-none-any.whl", hash = "sha256:2a3175ce74a06109ff9307d90a230f81215cbac9a751f4d1c6194644b8204f9d", size = 21592, upload-time = "2024-05-23T14:13:55.283Z" }, +] + [[package]] name = "durationpy" version = "0.10" @@ -478,6 +522,23 @@ http = [ { name = "aiohttp" }, ] +[[package]] +name = "geopandas" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "packaging" }, + { name = "pandas" }, + { name = "pyogrio" }, + { name = "pyproj" }, + { name = "shapely" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8d/24/5eb5685d7bf89d64218919379f882d19a60f8219d66d833c83b1cf264c95/geopandas-1.1.2.tar.gz", hash = "sha256:33f7b33565c46a45b8459a2ab699ec943fdbb5716e58e251b3c413cf7783106c", size = 336037, upload-time = "2025-12-22T21:06:13.749Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/54/e4/fac19dc34cb686c96011388b813ff7b858a70681e5ce6ce7698e5021b0f4/geopandas-1.1.2-py3-none-any.whl", hash = "sha256:2bb0b1052cb47378addb4ba54c47f8d4642dcbda9b61375638274f49d9f0bb0d", size = 341734, upload-time = "2025-12-22T21:06:12.498Z" }, +] + [[package]] name = "gitdb" version = "4.0.12" @@ -515,6 +576,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/db/18/79e9008530b79527e0d5f79e7eef08d3b179b7f851cfd3a2f27822fbdfa9/google_auth-2.47.0-py3-none-any.whl", hash = "sha256:c516d68336bfde7cf0da26aab674a36fedcf04b37ac4edd59c597178760c3498", size = 234867, upload-time = "2026-01-06T21:55:28.6Z" }, ] +[[package]] +name = "google-crc32c" +version = "1.8.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/03/41/4b9c02f99e4c5fb477122cd5437403b552873f014616ac1d19ac8221a58d/google_crc32c-1.8.0.tar.gz", hash = "sha256:a428e25fb7691024de47fecfbff7ff957214da51eddded0da0ae0e0f03a2cf79", size = 14192, upload-time = "2025-12-16T00:35:25.142Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e9/5f/7307325b1198b59324c0fa9807cafb551afb65e831699f2ce211ad5c8240/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:4b8286b659c1335172e39563ab0a768b8015e88e08329fa5321f774275fc3113", size = 31300, upload-time = "2025-12-16T00:21:56.723Z" }, + { url = "https://files.pythonhosted.org/packages/21/8e/58c0d5d86e2220e6a37befe7e6a94dd2f6006044b1a33edf1ff6d9f7e319/google_crc32c-1.8.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:2a3dc3318507de089c5384cc74d54318401410f82aa65b2d9cdde9d297aca7cb", size = 30867, upload-time = "2025-12-16T00:38:31.302Z" }, + { url = "https://files.pythonhosted.org/packages/ce/a9/a780cc66f86335a6019f557a8aaca8fbb970728f0efd2430d15ff1beae0e/google_crc32c-1.8.0-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:14f87e04d613dfa218d6135e81b78272c3b904e2a7053b841481b38a7d901411", size = 33364, upload-time = "2025-12-16T00:40:22.96Z" }, + { url = "https://files.pythonhosted.org/packages/21/3f/3457ea803db0198c9aaca2dd373750972ce28a26f00544b6b85088811939/google_crc32c-1.8.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cb5c869c2923d56cb0c8e6bcdd73c009c36ae39b652dbe46a05eb4ef0ad01454", size = 33740, upload-time = "2025-12-16T00:40:23.96Z" }, + { url = "https://files.pythonhosted.org/packages/df/c0/87c2073e0c72515bb8733d4eef7b21548e8d189f094b5dad20b0ecaf64f6/google_crc32c-1.8.0-cp312-cp312-win_amd64.whl", hash = "sha256:3cc0c8912038065eafa603b238abf252e204accab2a704c63b9e14837a854962", size = 34437, upload-time = "2025-12-16T00:35:21.395Z" }, +] + [[package]] name = "graphene" version = "3.4.3" @@ -640,6 +714,37 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "imagecodecs" +version = "2026.1.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/75/cedaa3dba300df85712515b9c9e13d848ea9557b796b6c44b50bd361571e/imagecodecs-2026.1.14.tar.gz", hash = "sha256:e37ef5116d41ba90b1c9d1d7121846671fd65c271f0c15ef24208353fa79b283", size = 9527808, upload-time = "2026-01-14T04:24:31.234Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/b2/c36b8633c3303ed8d80c8d8490c8488baa7f9e6f46fd11688cb9e68eae5e/imagecodecs-2026.1.14-cp311-abi3-macosx_10_14_x86_64.whl", hash = "sha256:b94c57922816eb025d443f4594f1235d80f0f56b4b48aa9b60bf9d679ea49415", size = 12861649, upload-time = "2026-01-14T04:23:45.235Z" }, + { url = "https://files.pythonhosted.org/packages/26/79/71780373cef2ec5d9d2f111010ff0a16de0788fdc9a8f26f3cce05d0ed38/imagecodecs-2026.1.14-cp311-abi3-macosx_11_0_arm64.whl", hash = "sha256:bd72bdff628d6c32c71f086e488f483abbf84816b05d439964d980af49f2a9c5", size = 10700009, upload-time = "2026-01-14T04:23:48.108Z" }, + { url = "https://files.pythonhosted.org/packages/84/b4/48fc1a9b2379941a752d046b6d9217a1e82c09ed11184a18245cbb0d9c8b/imagecodecs-2026.1.14-cp311-abi3-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e92f8b3bddf632c23d3a832f35e5a2c2326eb0e2ae1ebce419789cce63e5c30", size = 23748325, upload-time = "2026-01-14T04:23:51.644Z" }, + { url = "https://files.pythonhosted.org/packages/02/e1/8a299b91a4abee7c299c0c6625f9c0985c623fd4b6b41b5a5fe92508bb18/imagecodecs-2026.1.14-cp311-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8a78451926905459f42e827c207d80e1601abf68eaf0e38fd65ddd3c346d1f47", size = 24716561, upload-time = "2026-01-14T04:23:55.275Z" }, + { url = "https://files.pythonhosted.org/packages/81/af/f03fa2a60617a46814fea5523f8d53a703aabbb316e029b21efe8fe04f9f/imagecodecs-2026.1.14-cp311-abi3-win32.whl", hash = "sha256:6bbf7defac9f71e0401305440f7e94160201789e03ee75e6e5709bc904742429", size = 17224233, upload-time = "2026-01-14T04:23:58.261Z" }, + { url = "https://files.pythonhosted.org/packages/b6/22/ae01473653dbad9e10a1632b5d8ae6473de0f3ec2b82c912836661d501a9/imagecodecs-2026.1.14-cp311-abi3-win_amd64.whl", hash = "sha256:13ec4659d05010aa072644f100d0e1e1fcc61d7eaa960923b8216272682e6c9a", size = 21544843, upload-time = "2026-01-14T04:24:02.387Z" }, + { url = "https://files.pythonhosted.org/packages/54/6e/86fa1a07aee2ea39acfa04e372a144a72b4cdc80f40a11a3ee312c12d312/imagecodecs-2026.1.14-cp311-abi3-win_arm64.whl", hash = "sha256:2a6102f3b99c66e090a619f8a0204e6e95c01399854ffa09e4a9de476dc1671a", size = 16893409, upload-time = "2026-01-14T04:24:05.764Z" }, +] + +[[package]] +name = "imageio" +version = "2.37.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a3/6f/606be632e37bf8d05b253e8626c2291d74c691ddc7bcdf7d6aaf33b32f6a/imageio-2.37.2.tar.gz", hash = "sha256:0212ef2727ac9caa5ca4b2c75ae89454312f440a756fcfc8ef1993e718f50f8a", size = 389600, upload-time = "2025-11-04T14:29:39.898Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/fe/301e0936b79bcab4cacc7548bf2853fc28dced0a578bab1f7ef53c9aa75b/imageio-2.37.2-py3-none-any.whl", hash = "sha256:ad9adfb20335d718c03de457358ed69f141021a333c40a53e57273d8a5bd0b9b", size = 317646, upload-time = "2025-11-04T14:29:37.948Z" }, +] + [[package]] name = "importlib-metadata" version = "8.7.1" @@ -848,6 +953,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/70/05b685ea2dffcb2adbf3cdcea5d8865b7bc66f67249084cf845012a0ff13/kubernetes-35.0.0-py2.py3-none-any.whl", hash = "sha256:39e2b33b46e5834ef6c3985ebfe2047ab39135d41de51ce7641a7ca5b372a13d", size = 2017602, upload-time = "2026-01-16T01:05:25.991Z" }, ] +[[package]] +name = "lazy-loader" +version = "0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6f/6b/c875b30a1ba490860c93da4cabf479e03f584eba06fe5963f6f6644653d8/lazy_loader-0.4.tar.gz", hash = "sha256:47c75182589b91a4e1a85a136c074285a5ad4d9f39c63e0d7fb76391c4574cd1", size = 15431, upload-time = "2024-04-05T13:03:12.261Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/83/60/d497a310bde3f01cb805196ac61b7ad6dc5dcf8dce66634dc34364b20b4f/lazy_loader-0.4-py3-none-any.whl", hash = "sha256:342aa8e14d543a154047afb4ba8ef17f5563baad3fc610d7b15b213b0f119efc", size = 12097, upload-time = "2024-04-05T13:03:10.514Z" }, +] + [[package]] name = "librt" version = "0.7.8" @@ -1151,6 +1268,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c9/b2622292ea83fbb4ec318f5b9ab867d0a28ab43c5717bb85b0a5f6b3b0a4/networkx-3.6.1-py3-none-any.whl", hash = "sha256:d47fbf302e7d9cbbb9e2555a0d267983d2aa476bac30e90dfbe5669bd57f3762", size = 2068504, upload-time = "2025-12-08T17:02:38.159Z" }, ] +[[package]] +name = "numcodecs" +version = "0.16.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/bd/8a391e7c356366224734efd24da929cc4796fff468bfb179fe1af6548535/numcodecs-0.16.5.tar.gz", hash = "sha256:0d0fb60852f84c0bd9543cc4d2ab9eefd37fc8efcc410acd4777e62a1d300318", size = 6276387, upload-time = "2025-11-21T02:49:48.986Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/cc/55420f3641a67f78392dc0bc5d02cb9eb0a9dcebf2848d1ac77253ca61fa/numcodecs-0.16.5-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:24e675dc8d1550cd976a99479b87d872cb142632c75cc402fea04c08c4898523", size = 1656287, upload-time = "2025-11-21T02:49:25.755Z" }, + { url = "https://files.pythonhosted.org/packages/f5/6c/86644987505dcb90ba6d627d6989c27bafb0699f9fd00187e06d05ea8594/numcodecs-0.16.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:94ddfa4341d1a3ab99989d13b01b5134abb687d3dab2ead54b450aefe4ad5bd6", size = 1148899, upload-time = "2025-11-21T02:49:26.87Z" }, + { url = "https://files.pythonhosted.org/packages/97/1e/98aaddf272552d9fef1f0296a9939d1487914a239e98678f6b20f8b0a5c8/numcodecs-0.16.5-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b554ab9ecf69de7ca2b6b5e8bc696bd9747559cb4dd5127bd08d7a28bec59c3a", size = 8534814, upload-time = "2025-11-21T02:49:28.547Z" }, + { url = "https://files.pythonhosted.org/packages/fb/53/78c98ef5c8b2b784453487f3e4d6c017b20747c58b470393e230c78d18e8/numcodecs-0.16.5-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad1a379a45bd3491deab8ae6548313946744f868c21d5340116977ea3be5b1d6", size = 9173471, upload-time = "2025-11-21T02:49:30.444Z" }, + { url = "https://files.pythonhosted.org/packages/1c/20/2fdec87fc7f8cec950d2b0bea603c12dc9f05b4966dc5924ba5a36a61bf6/numcodecs-0.16.5-cp312-cp312-win_amd64.whl", hash = "sha256:845a9857886ffe4a3172ba1c537ae5bcc01e65068c31cf1fce1a844bd1da050f", size = 801412, upload-time = "2025-11-21T02:49:32.123Z" }, +] + [[package]] name = "numpy" version = "2.4.1" @@ -1313,6 +1447,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] +[[package]] +name = "ome-types" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "pydantic-extra-types" }, + { name = "xsdata" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/48/4c/d252c1619c733eec9b4d2d21fe369fd21a2594954b396bf4352edea1e272/ome_types-0.6.3.tar.gz", hash = "sha256:eef4138cda5edfdcb2a44cfb90b714a59ead1b69e4c5ce5f9892ad397ccaaa68", size = 121784, upload-time = "2025-11-26T00:28:24.34Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/6a/1000cad1700ab0af4d1b1d0a9c23c34badddb4f547c008bde2a6c61968f1/ome_types-0.6.3-py3-none-any.whl", hash = "sha256:ce9753ff351bbc534ee5c5038d3cf60b1e4c13d69ad2e6b5a5b75de2a52521a5", size = 245802, upload-time = "2025-11-26T00:28:22.853Z" }, +] + [[package]] name = "omegaconf" version = "2.3.0" @@ -1326,6 +1474,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" }, ] +[[package]] +name = "opencv-python-headless" +version = "4.13.0.90" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/ed/76/38c4cbb5ccfce7aaf36fd9be9fc74a15c85a48ef90bfaca2049b486e10c5/opencv_python_headless-4.13.0.90-cp37-abi3-macosx_13_0_arm64.whl", hash = "sha256:12a28674f215542c9bf93338de1b5bffd76996d32da9acb9e739fdb9c8bbd738", size = 46020414, upload-time = "2026-01-18T09:07:10.801Z" }, + { url = "https://files.pythonhosted.org/packages/93/c5/4b40daa5003b45aa8397f160324a091ed323733e2446dc0bdf3655e77b84/opencv_python_headless-4.13.0.90-cp37-abi3-macosx_14_0_x86_64.whl", hash = "sha256:32255203040dc98803be96362e13f9e4bce20146898222d2e5c242f80de50da5", size = 32568519, upload-time = "2026-01-18T09:07:52.368Z" }, + { url = "https://files.pythonhosted.org/packages/da/65/920e64a7f03cf5917cd2c6a3046293843c1a16ad89f0ed0f1c683979c9de/opencv_python_headless-4.13.0.90-cp37-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:e13790342591557050157713af17a7435ac1b50c65282715093c9297fa045d8f", size = 35191272, upload-time = "2026-01-18T09:08:49.235Z" }, + { url = "https://files.pythonhosted.org/packages/fc/13/af150685be342dc09bfb0824e2a280020ccf1c7fc64e15a31d9209016aa9/opencv_python_headless-4.13.0.90-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dbc1f4625e5af3a80ebdbd84380227c0f445228588f2521b11af47710caca1ba", size = 57683677, upload-time = "2026-01-18T09:10:23.588Z" }, + { url = "https://files.pythonhosted.org/packages/cd/47/baab2a3b6d8da8c52e73d00207d1ed3155601c2c332ea855455b3fbc8ff4/opencv_python_headless-4.13.0.90-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eba38bc255d0b7d1969c5bcc90a060ca2b61a3403b613872c750bfa5dfe9e03b", size = 36590019, upload-time = "2026-01-18T09:10:49.053Z" }, + { url = "https://files.pythonhosted.org/packages/81/a1/facfe2801a861b424c4221d66e1281cf19735c00e07f063a337a208c11b5/opencv_python_headless-4.13.0.90-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f46b17ea0aa7e4124ca6ad71143f89233ae9557f61d2326bcdb34329a1ddf9bd", size = 62535926, upload-time = "2026-01-18T09:12:47.229Z" }, + { url = "https://files.pythonhosted.org/packages/06/d2/5e9ee7512306c1caa518be929d1f44bb1c189f342f538f73bea6fb94919f/opencv_python_headless-4.13.0.90-cp37-abi3-win32.whl", hash = "sha256:96060fc57a1abb1144b0b8129e2ff3bfcdd0ccd8e8bd05bd85256ff4ed587d3b", size = 30811665, upload-time = "2026-01-18T09:13:44.517Z" }, + { url = "https://files.pythonhosted.org/packages/a0/09/0a4d832448dccd03b2b1bdee70b9fc2e02c147cc7e06975e9cd729569d90/opencv_python_headless-4.13.0.90-cp37-abi3-win_amd64.whl", hash = "sha256:0e0c8c9f620802fddc4fa7f471a1d263c7b0dca16cd9e7e2f996bb8bd2128c0c", size = 40070035, upload-time = "2026-01-18T09:15:14.652Z" }, +] + [[package]] name = "openpyxl" version = "3.1.5" @@ -1423,6 +1589,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" }, ] +[[package]] +name = "pandas-stubs" +version = "2.3.3.260113" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "types-pytz" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/92/5d/be23854a73fda69f1dbdda7bc10fbd6f930bd1fa87aaec389f00c901c1e8/pandas_stubs-2.3.3.260113.tar.gz", hash = "sha256:076e3724bcaa73de78932b012ec64b3010463d377fa63116f4e6850643d93800", size = 116131, upload-time = "2026-01-13T22:30:16.704Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/c6/df1fe324248424f77b89371116dab5243db7f052c32cc9fe7442ad9c5f75/pandas_stubs-2.3.3.260113-py3-none-any.whl", hash = "sha256:ec070b5c576e1badf12544ae50385872f0631fc35d99d00dc598c2954ec564d3", size = 168246, upload-time = "2026-01-13T22:30:15.244Z" }, +] + [[package]] name = "parso" version = "0.8.5" @@ -1655,6 +1834,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, ] +[[package]] +name = "pydantic-extra-types" +version = "2.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fd/35/2fee58b1316a73e025728583d3b1447218a97e621933fc776fb8c0f2ebdd/pydantic_extra_types-2.11.0.tar.gz", hash = "sha256:4e9991959d045b75feb775683437a97991d02c138e00b59176571db9ce634f0e", size = 157226, upload-time = "2025-12-31T16:18:27.944Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" }, +] + [[package]] name = "pygments" version = "2.19.2" @@ -1664,6 +1856,25 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyogrio" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "numpy" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/d4/12f86b1ed09721363da4c09622464b604c851a9223fc0c6b393fb2012208/pyogrio-0.12.1.tar.gz", hash = "sha256:e548ab705bb3e5383693717de1e6c76da97f3762ab92522cb310f93128a75ff1", size = 303289, upload-time = "2025-11-28T19:04:53.341Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/e0/656b6536549d41b5aec57e0deca1f269b4f17532f0636836f587e581603a/pyogrio-0.12.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:7a0d5ca39184030aec4cde30f4258f75b227a854530d2659babc8189d76e657d", size = 23661857, upload-time = "2025-11-28T19:03:27.744Z" }, + { url = "https://files.pythonhosted.org/packages/14/78/313259e40da728bdb60106ffdc7ea8224d164498cb838ecb79b634aab967/pyogrio-0.12.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:feaff42bbe8087ca0b30e33b09d1ce049ca55fe83ad83db1139ef37d1d04f30c", size = 25237106, upload-time = "2025-11-28T19:03:30.018Z" }, + { url = "https://files.pythonhosted.org/packages/8f/ca/5368571a8b00b941ccfbe6ea29a5566aaffd45d4eb1553b956f7755af43e/pyogrio-0.12.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:81096a5139532de5a8003ef02b41d5d2444cb382a9aecd1165b447eb549180d3", size = 31417048, upload-time = "2025-11-28T19:03:32.572Z" }, + { url = "https://files.pythonhosted.org/packages/ef/85/6eeb875f27bf498d657eb5dab9f58e4c48b36c9037122787abee9a1ba4ba/pyogrio-0.12.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:41b78863f782f7a113ed0d36a5dc74d59735bd3a82af53510899bb02a18b06bb", size = 30952115, upload-time = "2025-11-28T19:03:35.332Z" }, + { url = "https://files.pythonhosted.org/packages/36/f7/cf8bec9024625947e1a71441906f60a5fa6f9e4c441c4428037e73b1fcc8/pyogrio-0.12.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:8b65be8c4258b27cc8f919b21929cecdadda4c353e3637fa30850339ef4d15c5", size = 32537246, upload-time = "2025-11-28T19:03:37.969Z" }, + { url = "https://files.pythonhosted.org/packages/ab/10/7c9f5e428273574e69f217eba3a6c0c42936188ad4dcd9e2c41ebb711188/pyogrio-0.12.1-cp312-cp312-win_amd64.whl", hash = "sha256:1291b866c2c81d991bda15021b08b3621709b40ee3a85689229929e9465788bf", size = 22933980, upload-time = "2025-11-28T19:03:41.047Z" }, +] + [[package]] name = "pyparsing" version = "3.3.2" @@ -1673,6 +1884,26 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pyproj" +version = "3.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/04/90/67bd7260b4ea9b8b20b4f58afef6c223ecb3abf368eb4ec5bc2cdef81b49/pyproj-3.7.2.tar.gz", hash = "sha256:39a0cf1ecc7e282d1d30f36594ebd55c9fae1fda8a2622cee5d100430628f88c", size = 226279, upload-time = "2025-08-14T12:05:42.18Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/ab/9893ea9fb066be70ed9074ae543914a618c131ed8dff2da1e08b3a4df4db/pyproj-3.7.2-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:0a9bb26a6356fb5b033433a6d1b4542158fb71e3c51de49b4c318a1dff3aeaab", size = 6219832, upload-time = "2025-08-14T12:04:10.264Z" }, + { url = "https://files.pythonhosted.org/packages/53/78/4c64199146eed7184eb0e85bedec60a4aa8853b6ffe1ab1f3a8b962e70a0/pyproj-3.7.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:567caa03021178861fad27fabde87500ec6d2ee173dd32f3e2d9871e40eebd68", size = 4620650, upload-time = "2025-08-14T12:04:11.978Z" }, + { url = "https://files.pythonhosted.org/packages/b6/ac/14a78d17943898a93ef4f8c6a9d4169911c994e3161e54a7cedeba9d8dde/pyproj-3.7.2-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:c203101d1dc3c038a56cff0447acc515dd29d6e14811406ac539c21eed422b2a", size = 9667087, upload-time = "2025-08-14T12:04:13.964Z" }, + { url = "https://files.pythonhosted.org/packages/b8/be/212882c450bba74fc8d7d35cbd57e4af84792f0a56194819d98106b075af/pyproj-3.7.2-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:1edc34266c0c23ced85f95a1ee8b47c9035eae6aca5b6b340327250e8e281630", size = 9552797, upload-time = "2025-08-14T12:04:16.624Z" }, + { url = "https://files.pythonhosted.org/packages/ba/c0/c0f25c87b5d2a8686341c53c1792a222a480d6c9caf60311fec12c99ec26/pyproj-3.7.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aa9f26c21bc0e2dc3d224cb1eb4020cf23e76af179a7c66fea49b828611e4260", size = 10837036, upload-time = "2025-08-14T12:04:18.733Z" }, + { url = "https://files.pythonhosted.org/packages/5d/37/5cbd6772addde2090c91113332623a86e8c7d583eccb2ad02ea634c4a89f/pyproj-3.7.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9428b318530625cb389b9ddc9c51251e172808a4af79b82809376daaeabe5e9", size = 10775952, upload-time = "2025-08-14T12:04:20.709Z" }, + { url = "https://files.pythonhosted.org/packages/69/a1/dc250e3cf83eb4b3b9a2cf86fdb5e25288bd40037ae449695550f9e96b2f/pyproj-3.7.2-cp312-cp312-win32.whl", hash = "sha256:b3d99ed57d319da042f175f4554fc7038aa4bcecc4ac89e217e350346b742c9d", size = 5898872, upload-time = "2025-08-14T12:04:22.485Z" }, + { url = "https://files.pythonhosted.org/packages/4a/a6/6fe724b72b70f2b00152d77282e14964d60ab092ec225e67c196c9b463e5/pyproj-3.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:11614a054cd86a2ed968a657d00987a86eeb91fdcbd9ad3310478685dc14a128", size = 6312176, upload-time = "2025-08-14T12:04:24.736Z" }, + { url = "https://files.pythonhosted.org/packages/5d/68/915cc32c02a91e76d02c8f55d5a138d6ef9e47a0d96d259df98f4842e558/pyproj-3.7.2-cp312-cp312-win_arm64.whl", hash = "sha256:509a146d1398bafe4f53273398c3bb0b4732535065fa995270e52a9d3676bca3", size = 6233452, upload-time = "2025-08-14T12:04:27.287Z" }, +] + [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1821,6 +2052,48 @@ dependencies = [ { name = "tenacity" }, ] +[[package]] +name = "rationai-tiling" +version = "1.1.1" +source = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git#475e7938e9a7230b378fca1c67106eda84f69c85" } +dependencies = [ + { name = "mlflow" }, + { name = "numpy" }, + { name = "openslide-python" }, + { name = "pandas" }, + { name = "pyvips" }, + { name = "rationai-masks" }, + { name = "ray" }, + { name = "tqdm" }, +] + +[[package]] +name = "ratiopath" +version = "1.0.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "albumentations" }, + { name = "geopandas" }, + { name = "imagecodecs" }, + { name = "numpy" }, + { name = "ome-types" }, + { name = "openslide-python" }, + { name = "pandas" }, + { name = "pandas-stubs" }, + { name = "pillow" }, + { name = "pyvips" }, + { name = "ray", extra = ["data"] }, + { name = "scikit-image" }, + { name = "shapely" }, + { name = "tifffile" }, + { name = "torch" }, + { name = "zarr" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/71/71/84b7a927eb9ea410e793011e14257b291a0f8917bd09ff03f46475bf763d/ratiopath-1.0.3.tar.gz", hash = "sha256:d70d6fa8a387422e04e7b1535c2131830f9573cd6d58bd4b3167c3d008637e87", size = 21706, upload-time = "2025-09-29T20:23:52.047Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/f8/802c5f470f87afca0036f36c77067cde560c1225ca04df493ad3722ceee1/ratiopath-1.0.3-py3-none-any.whl", hash = "sha256:cd2a217c261421820b3385c70fa2bdb5b9c9635bff0e5fe7655169cba7ac198d", size = 28040, upload-time = "2025-09-29T20:23:50.957Z" }, +] + [[package]] name = "ray" version = "2.53.0" @@ -1842,6 +2115,14 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8d/6c/bba6f22a9d83ee8f236000ba315f0c197bdc79888b4fa42fd762f729cbbd/ray-2.53.0-cp312-cp312-win_amd64.whl", hash = "sha256:b828c147f9ff2f277b1d254e4fe9a746fdfaee7e313a93a97c7edf4dae9b81a4", size = 27178106, upload-time = "2025-12-20T16:06:45.594Z" }, ] +[package.optional-dependencies] +data = [ + { name = "fsspec" }, + { name = "numpy" }, + { name = "pandas" }, + { name = "pyarrow" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -1945,6 +2226,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] +[[package]] +name = "scikit-image" +version = "0.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "imageio" }, + { name = "lazy-loader" }, + { name = "networkx" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "pillow" }, + { name = "scipy" }, + { name = "tifffile" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/b4/2528bb43c67d48053a7a649a9666432dc307d66ba02e3a6d5c40f46655df/scikit_image-0.26.0.tar.gz", hash = "sha256:f5f970ab04efad85c24714321fcc91613fcb64ef2a892a13167df2f3e59199fa", size = 22729739, upload-time = "2025-12-20T17:12:21.824Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/99/e8/e13757982264b33a1621628f86b587e9a73a13f5256dad49b19ba7dc9083/scikit_image-0.26.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d454b93a6fa770ac5ae2d33570f8e7a321bb80d29511ce4b6b78058ebe176e8c", size = 12376452, upload-time = "2025-12-20T17:10:52.796Z" }, + { url = "https://files.pythonhosted.org/packages/e3/be/f8dd17d0510f9911f9f17ba301f7455328bf13dae416560126d428de9568/scikit_image-0.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3409e89d66eff5734cd2b672d1c48d2759360057e714e1d92a11df82c87cba37", size = 12061567, upload-time = "2025-12-20T17:10:55.207Z" }, + { url = "https://files.pythonhosted.org/packages/b3/2b/c70120a6880579fb42b91567ad79feb4772f7be72e8d52fec403a3dde0c6/scikit_image-0.26.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c717490cec9e276afb0438dd165b7c3072d6c416709cc0f9f5a4c1070d23a44", size = 13084214, upload-time = "2025-12-20T17:10:57.468Z" }, + { url = "https://files.pythonhosted.org/packages/f4/a2/70401a107d6d7466d64b466927e6b96fcefa99d57494b972608e2f8be50f/scikit_image-0.26.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7df650e79031634ac90b11e64a9eedaf5a5e06fcd09bcd03a34be01745744466", size = 13561683, upload-time = "2025-12-20T17:10:59.49Z" }, + { url = "https://files.pythonhosted.org/packages/13/a5/48bdfd92794c5002d664e0910a349d0a1504671ef5ad358150f21643c79a/scikit_image-0.26.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cefd85033e66d4ea35b525bb0937d7f42d4cdcfed2d1888e1570d5ce450d3932", size = 14112147, upload-time = "2025-12-20T17:11:02.083Z" }, + { url = "https://files.pythonhosted.org/packages/ee/b5/ac71694da92f5def5953ca99f18a10fe98eac2dd0a34079389b70b4d0394/scikit_image-0.26.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3f5bf622d7c0435884e1e141ebbe4b2804e16b2dd23ae4c6183e2ea99233be70", size = 14661625, upload-time = "2025-12-20T17:11:04.528Z" }, + { url = "https://files.pythonhosted.org/packages/23/4d/a3cc1e96f080e253dad2251bfae7587cf2b7912bcd76fd43fd366ff35a87/scikit_image-0.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:abed017474593cd3056ae0fe948d07d0747b27a085e92df5474f4955dd65aec0", size = 11911059, upload-time = "2025-12-20T17:11:06.61Z" }, + { url = "https://files.pythonhosted.org/packages/35/8a/d1b8055f584acc937478abf4550d122936f420352422a1a625eef2c605d8/scikit_image-0.26.0-cp312-cp312-win_arm64.whl", hash = "sha256:4d57e39ef67a95d26860c8caf9b14b8fb130f83b34c6656a77f191fa6d1d04d8", size = 11348740, upload-time = "2025-12-20T17:11:09.118Z" }, +] + [[package]] name = "scikit-learn" version = "1.8.0" @@ -1995,6 +2302,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e0/76/f963c61683a39084aa575f98089253e1e852a4417cb8a3a8a422923a5246/setuptools-80.10.1-py3-none-any.whl", hash = "sha256:fc30c51cbcb8199a219c12cc9c281b5925a4978d212f84229c909636d9f6984e", size = 1099859, upload-time = "2026-01-21T09:42:00.688Z" }, ] +[[package]] +name = "shapely" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/4d/bc/0989043118a27cccb4e906a46b7565ce36ca7b57f5a18b78f4f1b0f72d9d/shapely-2.1.2.tar.gz", hash = "sha256:2ed4ecb28320a433db18a5bf029986aa8afcfd740745e78847e330d5d94922a9", size = 315489, upload-time = "2025-09-24T13:51:41.432Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/c0/f3b6453cf2dfa99adc0ba6675f9aaff9e526d2224cbd7ff9c1a879238693/shapely-2.1.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fe2533caae6a91a543dec62e8360fe86ffcdc42a7c55f9dfd0128a977a896b94", size = 1833550, upload-time = "2025-09-24T13:50:30.019Z" }, + { url = "https://files.pythonhosted.org/packages/86/07/59dee0bc4b913b7ab59ab1086225baca5b8f19865e6101db9ebb7243e132/shapely-2.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ba4d1333cc0bc94381d6d4308d2e4e008e0bd128bdcff5573199742ee3634359", size = 1643556, upload-time = "2025-09-24T13:50:32.291Z" }, + { url = "https://files.pythonhosted.org/packages/26/29/a5397e75b435b9895cd53e165083faed5d12fd9626eadec15a83a2411f0f/shapely-2.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0bd308103340030feef6c111d3eb98d50dc13feea33affc8a6f9fa549e9458a3", size = 2988308, upload-time = "2025-09-24T13:50:33.862Z" }, + { url = "https://files.pythonhosted.org/packages/b9/37/e781683abac55dde9771e086b790e554811a71ed0b2b8a1e789b7430dd44/shapely-2.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:1e7d4d7ad262a48bb44277ca12c7c78cb1b0f56b32c10734ec9a1d30c0b0c54b", size = 3099844, upload-time = "2025-09-24T13:50:35.459Z" }, + { url = "https://files.pythonhosted.org/packages/d8/f3/9876b64d4a5a321b9dc482c92bb6f061f2fa42131cba643c699f39317cb9/shapely-2.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e9eddfe513096a71896441a7c37db72da0687b34752c4e193577a145c71736fc", size = 3988842, upload-time = "2025-09-24T13:50:37.478Z" }, + { url = "https://files.pythonhosted.org/packages/d1/a0/704c7292f7014c7e74ec84eddb7b109e1fbae74a16deae9c1504b1d15565/shapely-2.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:980c777c612514c0cf99bc8a9de6d286f5e186dcaf9091252fcd444e5638193d", size = 4152714, upload-time = "2025-09-24T13:50:39.9Z" }, + { url = "https://files.pythonhosted.org/packages/53/46/319c9dc788884ad0785242543cdffac0e6530e4d0deb6c4862bc4143dcf3/shapely-2.1.2-cp312-cp312-win32.whl", hash = "sha256:9111274b88e4d7b54a95218e243282709b330ef52b7b86bc6aaf4f805306f454", size = 1542745, upload-time = "2025-09-24T13:50:41.414Z" }, + { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, +] + +[[package]] +name = "simsimd" +version = "6.5.12" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a4/13/dbcee7d607cbcfdfdf3a0593bec46479ce4e5957b39c5e81333efe540464/simsimd-6.5.12.tar.gz", hash = "sha256:c9b8720c9bc9dcfc36f570c2f96bfd74d1c9e1d0ebeecafc7a130ad3f0affe41", size = 186676, upload-time = "2025-12-21T01:13:38.467Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9f/be/3636d31575a48e75d6a3f52836739bf02f930843a7455ea9515d83a4618f/simsimd-6.5.12-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:63df722f710d9adfa4d4a46772da203d7854b55ca4fd45c3fee149b546e1b56b", size = 105091, upload-time = "2025-12-21T01:11:01.823Z" }, + { url = "https://files.pythonhosted.org/packages/6b/55/cd16b42861c58c52b39da6806b820ed48a817ce966fc9ed4ad5c16543519/simsimd-6.5.12-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec6fcccc99f06e4dff5bd3af8b3d290e5199e7b6414e0c050fa52e5ae2797940", size = 94561, upload-time = "2025-12-21T01:11:03.073Z" }, + { url = "https://files.pythonhosted.org/packages/44/29/019063f8b962f227c8d2dd40e84a074bc4007b0ae55bf8a260648c9d839e/simsimd-6.5.12-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8c780491ea8f927ba6d20dce9d29f1eeab8e0eee1a4306d844a3a1db03ea1a05", size = 384963, upload-time = "2025-12-21T01:11:04.736Z" }, + { url = "https://files.pythonhosted.org/packages/3f/74/c485204fb2a6208059a774d42787462d1be74b1cc51b9c76d9680f7a6ef1/simsimd-6.5.12-cp312-cp312-manylinux2014_i686.manylinux_2_17_i686.manylinux_2_28_i686.whl", hash = "sha256:1b9a2b7b63820e289d0e5df28d9331058c2ae898917faf59f52ff07a47d882a2", size = 274160, upload-time = "2025-12-21T01:11:06.052Z" }, + { url = "https://files.pythonhosted.org/packages/53/12/f28f9afb95e4497759ef5507f1d8f53bc486476c7e2db4a9199d4389779f/simsimd-6.5.12-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e3446d79773525627bf218bf30114dc60599e496c9a9bb02aa61f96b137ecc41", size = 295453, upload-time = "2025-12-21T01:11:07.448Z" }, + { url = "https://files.pythonhosted.org/packages/7f/0b/5b84d21461e5591616dc720ab1ef45b73367ff203860ca575511ad09db31/simsimd-6.5.12-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:559a5c53ac2353281746905c9c2b763db1df5b38ca040740cd50e2fdd32320c9", size = 285482, upload-time = "2025-12-21T01:11:08.762Z" }, + { url = "https://files.pythonhosted.org/packages/c2/90/f66c0f1d87c5d00ecae5774398e5d636c76bdf84d8b7d0e8182c82c37cd1/simsimd-6.5.12-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d7213a87303563b7a82de1c597c604bf018483350ddab93c9c7b9b2b0646b70", size = 582953, upload-time = "2025-12-21T01:11:10.096Z" }, + { url = "https://files.pythonhosted.org/packages/6e/01/0dda71460b7414fbd3f5522dcee7b406d5acc309060c5f146e4d6aff9881/simsimd-6.5.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ba817741a998810381540aa605cc0975a064ff25a12df97113180b4eb6cf6ffa", size = 421266, upload-time = "2025-12-21T01:11:11.46Z" }, + { url = "https://files.pythonhosted.org/packages/f1/9e/1f816cbfdd98b3bc7b2aec866f6c34ed958fe61d38876b9ec83509543b59/simsimd-6.5.12-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:1b4213afc55a8658eb4a4f8af6ae74f3d22166470a3c90b16b2ad1056b3bb368", size = 318311, upload-time = "2025-12-21T01:11:12.789Z" }, + { url = "https://files.pythonhosted.org/packages/58/57/b9245ebfa35e9f0ebf23085ce21330dd24add5560719156d2873a29e4181/simsimd-6.5.12-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:b8a8c15bb3e137bed63c9d212609f0e5453ed52caf5e08b0d18804b5ae706a16", size = 338390, upload-time = "2025-12-21T01:11:14.203Z" }, + { url = "https://files.pythonhosted.org/packages/2b/41/5ec5147f8b20c9dc487770692560135d8003205a415d1ee5cff7309fbba0/simsimd-6.5.12-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:7c72dee3b815b68ac0dc8fc806d45ba4d86985a67f7c96de318dfbe5fe890b51", size = 315824, upload-time = "2025-12-21T01:11:15.563Z" }, + { url = "https://files.pythonhosted.org/packages/94/27/0bc510f629961dd217f5544adf3b7fe209785926119ee9e277da31e9082f/simsimd-6.5.12-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a8b1714c4cd3475e85a70c9b5d89b0c6244dd2e0bdcd12850c9e2b894bce425f", size = 619010, upload-time = "2025-12-21T01:11:17.523Z" }, + { url = "https://files.pythonhosted.org/packages/b7/c5/551b4965982cb440f08f6ba2ae9d6e919cbd6d604962f5f3dffe922bcd8c/simsimd-6.5.12-cp312-cp312-win_amd64.whl", hash = "sha256:baf13245f8b625be0ed440fd67e3d438b2409167992bac09b08dde2019917489", size = 87423, upload-time = "2025-12-21T01:11:19.352Z" }, + { url = "https://files.pythonhosted.org/packages/39/86/7c492c15b304daf5b235b7a82aba0df7c13807aee8bda8ec1666f685e1eb/simsimd-6.5.12-cp312-cp312-win_arm64.whl", hash = "sha256:001c24e6a575223f9fac0860b61eb4b153d399b54d54a6cba619966d113681fc", size = 62864, upload-time = "2025-12-21T01:11:21.054Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -2069,6 +2417,30 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d9/52/1064f510b141bd54025f9b55105e26d1fa970b9be67ad766380a3c9b74b0/starlette-0.50.0-py3-none-any.whl", hash = "sha256:9e5391843ec9b6e472eed1365a78c8098cfceb7a74bfd4d6b1c0c0095efb3bca", size = 74033, upload-time = "2025-11-01T15:25:25.461Z" }, ] +[[package]] +name = "stringzilla" +version = "4.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/61/68/475518f6f4af8273ecd619a5d37d715d36908973f9970faf21571a296821/stringzilla-4.6.0.tar.gz", hash = "sha256:640c0fb5b6a2ad77b7721bff98f00a3c524ca60dc202f552e486831a751d4bbd", size = 646335, upload-time = "2025-12-26T23:44:43.956Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/d3/1ce995ec1efd59904c4787e0fbc3ef18837459b82a32ee4a6c07a10edff6/stringzilla-4.6.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:78210bd526350f305de03de297a96eb9caa52cd6559257e15d0940818f849838", size = 212271, upload-time = "2025-12-26T23:43:07.943Z" }, + { url = "https://files.pythonhosted.org/packages/07/bb/272843655659a0604e32727fdd3d490a11206d7d0ef50f3f0dc6b582ce74/stringzilla-4.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5566c46fb89c2885a5e9abaaf94f2e3b632c2b7f30824ed2f4d3a31615b39bfa", size = 199353, upload-time = "2025-12-26T23:43:09.088Z" }, + { url = "https://files.pythonhosted.org/packages/33/ca/cb61f293a919fa04bb1b7a2672ea35c69df273fe5aa66410646c50bfc948/stringzilla-4.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83a7545b65c8ebc4182d4a6e494995affd4a43871e7cc89d951aa56854a576fb", size = 689232, upload-time = "2025-12-26T23:43:10.923Z" }, + { url = "https://files.pythonhosted.org/packages/20/e3/ec0e5332975a213ef3456acd18d86de076d86026043b7aa3ccba23cec33d/stringzilla-4.6.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5cff924b1ff38cacd05bb9426675dbbd4ce9c12354978fb33922e316a5be4f8c", size = 657168, upload-time = "2025-12-26T23:43:12.733Z" }, + { url = "https://files.pythonhosted.org/packages/41/70/34af64c767656c23b8d0efdd5a783124f3b1067ff57992640f38f439e109/stringzilla-4.6.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.manylinux_2_28_s390x.whl", hash = "sha256:cf83c6c1eabed2704cfb884c8f07ddaebd8d7cddcfef6a4bf76c1a6cad381b2f", size = 640477, upload-time = "2025-12-26T23:43:14.012Z" }, + { url = "https://files.pythonhosted.org/packages/f2/c8/fd7101beb8268d231afe3beb20038b17543ab7ea2a59f56eded1782e420f/stringzilla-4.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5d01f2bc4e0ee2d1c444ece06b18016f00e16f7b50491fe81984fa9ac584caa5", size = 2053831, upload-time = "2025-12-26T23:43:15.717Z" }, + { url = "https://files.pythonhosted.org/packages/0f/31/6e3734d402a17077d91c9ff0bebba6168ac98a71baa06c555521e38cc938/stringzilla-4.6.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78b16030d3c554860ec1738844bc8f06a278c76669957287e42195fe0fd4cfb1", size = 643989, upload-time = "2025-12-26T23:43:17.217Z" }, + { url = "https://files.pythonhosted.org/packages/07/c2/b5951bd07abe255f5e3018b52ccf3a31d166e6c91934fba6d7210b9efa3c/stringzilla-4.6.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97062003478a73c5263b1c91cc28a2147469fccf3c9d8c042e4394b8af261446", size = 653523, upload-time = "2025-12-26T23:43:18.444Z" }, + { url = "https://files.pythonhosted.org/packages/60/a6/af547d8b2695f93f08b324a9dde6e9bed07a1bbf7f9123427dfc48458a08/stringzilla-4.6.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:07db5994130efc77b93ff44982f37b8cd1bffaa6d51033b0d7d37ceb211446d2", size = 586796, upload-time = "2025-12-26T23:43:19.736Z" }, + { url = "https://files.pythonhosted.org/packages/f4/c0/d9bc41fab3b1261352a80bc8f9ef189c246817bcc66af17b6a6fca27c7fc/stringzilla-4.6.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f2c706cb6cd522d406874688aaef964eb60e8b24ab027bd7c21fa129f6308a6", size = 626102, upload-time = "2025-12-26T23:43:20.999Z" }, + { url = "https://files.pythonhosted.org/packages/c8/c2/df732da9f8cafebfb58db0d9167211c09143f3858d259d1d7ecb4d66f87b/stringzilla-4.6.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5b14cca3091fb50f56710b3cc5f95830f6f64a44d72739b3dabff2052722dd45", size = 618774, upload-time = "2025-12-26T23:43:22.528Z" }, + { url = "https://files.pythonhosted.org/packages/f8/a1/5102428c3285d30d8ae647168450749be3ff0309ac34ae71c8eb72ebb420/stringzilla-4.6.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:88817ba207dc28412ed3e521d0fc4925e86e240f3dc34c5fff62808b01936f1c", size = 613122, upload-time = "2025-12-26T23:43:23.808Z" }, + { url = "https://files.pythonhosted.org/packages/13/9b/45714783635d0b13caf2775d2a7a71f05874077ae08c4cbd3aba56e015c6/stringzilla-4.6.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1d0fef232f918fa93b1b8b64e9c06831cfbe00aef5289c3d4792f219a0f2947a", size = 1909235, upload-time = "2025-12-26T23:43:25.056Z" }, + { url = "https://files.pythonhosted.org/packages/e1/c0/d84f8bcaff6831905e54eba0a0cd9b316c0393d453a8a50b81d7937a0f5a/stringzilla-4.6.0-cp312-cp312-win32.whl", hash = "sha256:d09f6f0ba17dff5cff7024a29cbb74d8239f3c6692aa706a712cf47f65b39d24", size = 114751, upload-time = "2025-12-26T23:43:26.645Z" }, + { url = "https://files.pythonhosted.org/packages/b4/f6/16981b49f2267e1f39922379125d44134d9326b92f1c044232856a9e1a50/stringzilla-4.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:5bcf791ed67570cc1a268da45710796c891819b3e96c14bc3d1a81f388c6e0ee", size = 162427, upload-time = "2025-12-26T23:43:27.826Z" }, + { url = "https://files.pythonhosted.org/packages/35/70/77f31fc6f0a935b61eca735fc11f188ed9d5cd70bbc178b17d50a86ebfac/stringzilla-4.6.0-cp312-cp312-win_arm64.whl", hash = "sha256:c0d11a145a455d73f9bc718295c41611ff55a777a38119a84f47d4b0eaea6df3", size = 123343, upload-time = "2025-12-26T23:43:29.466Z" }, +] + [[package]] name = "sympy" version = "1.14.0" @@ -2099,6 +2471,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/32/d5/f9a850d79b0851d1d4ef6456097579a9005b31fea68726a4ae5f2d82ddd9/threadpoolctl-3.6.0-py3-none-any.whl", hash = "sha256:43a0b8fd5a2928500110039e43a5eed8480b918967083ea48dc3ab9f13c4a7fb", size = 18638, upload-time = "2025-03-13T13:49:21.846Z" }, ] +[[package]] +name = "tifffile" +version = "2026.1.14" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5d/19/a41ab0dc1b314da952d99957289944c3b8b76021399c72693e4c1fddc6c3/tifffile-2026.1.14.tar.gz", hash = "sha256:a423c583e1eecd9ca255642d47f463efa8d7f2365a0e110eb0167570493e0c8c", size = 373639, upload-time = "2026-01-14T22:40:43.551Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/4d/3fd60d3a37b544cb59463add86e4dfbb485880225115341281906a7b140e/tifffile-2026.1.14-py3-none-any.whl", hash = "sha256:29cf4adb43562a4624fc959018ab1b44e0342015d3db4581b983fe40e05f5924", size = 232213, upload-time = "2026-01-14T22:40:41.553Z" }, +] + [[package]] name = "torch" version = "2.10.0" @@ -2199,6 +2583,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, ] +[[package]] +name = "types-pytz" +version = "2025.2.0.20251108" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/40/ff/c047ddc68c803b46470a357454ef76f4acd8c1088f5cc4891cdd909bfcf6/types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb", size = 10961, upload-time = "2025-11-08T02:55:57.001Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c", size = 10116, upload-time = "2025-11-08T02:55:56.194Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2244,6 +2637,8 @@ dependencies = [ { name = "rationai-masks" }, { name = "rationai-mlkit" }, { name = "rationai-sdk" }, + { name = "rationai-tiling" }, + { name = "ratiopath" }, { name = "ray" }, { name = "torch" }, { name = "torchmetrics" }, @@ -2271,6 +2666,8 @@ requires-dist = [ { name = "rationai-masks", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/masks.git" }, { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, + { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, + { name = "ratiopath", specifier = ">=1.0.3" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, @@ -2345,6 +2742,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" }, ] +[[package]] +name = "xsdata" +version = "26.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/24/9f/c1b0fa54f2b3f43989015668daddd8bd50d3bf3461ba8b79fdeb8000b27d/xsdata-26.1.tar.gz", hash = "sha256:dcae2c0e5f329f1b4e09a2d148c96941c9556616bb5e3418970a63c5eb2cd831", size = 348523, upload-time = "2026-01-19T19:03:54.347Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7b/48/37775c15adba3070f0af934a6617696a608a75f2e01be37a454e298cf32c/xsdata-26.1-py3-none-any.whl", hash = "sha256:aa02adf1b75668e7b685f4de3c4507e4aa31af3f2e2210fd45aa2f0e4a637a99", size = 235422, upload-time = "2026-01-19T19:03:52.684Z" }, +] + [[package]] name = "yarl" version = "1.22.0" @@ -2375,6 +2784,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/73/ae/b48f95715333080afb75a4504487cbe142cae1268afc482d06692d605ae6/yarl-1.22.0-py3-none-any.whl", hash = "sha256:1380560bdba02b6b6c90de54133c81c9f2a453dee9912fe58c1dcced1edb7cff", size = 46814, upload-time = "2025-10-06T14:12:53.872Z" }, ] +[[package]] +name = "zarr" +version = "3.1.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "donfig" }, + { name = "google-crc32c" }, + { name = "numcodecs" }, + { name = "numpy" }, + { name = "packaging" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/fc/76/7fa87f57c112c7b9c82f0a730f8b6f333e792574812872e2cd45ab604199/zarr-3.1.5.tar.gz", hash = "sha256:fbe0c79675a40c996de7ca08e80a1c0a20537bd4a9f43418b6d101395c0bba2b", size = 366825, upload-time = "2025-11-21T14:06:01.492Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/44/15/bb13b4913ef95ad5448490821eee4671d0e67673342e4d4070854e5fe081/zarr-3.1.5-py3-none-any.whl", hash = "sha256:29cd905afb6235b94c09decda4258c888fcb79bb6c862ef7c0b8fe009b5c8563", size = 284067, upload-time = "2025-11-21T14:05:59.235Z" }, +] + [[package]] name = "zipp" version = "3.23.0" From e88bfd868e048d5be6db90fef1a1987ac855a80b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:24:52 +0000 Subject: [PATCH 07/42] fix: naming --- preprocessing/quality_control.py | 6 +++--- preprocessing/tissue_masks.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index bce3912..96b8c13 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -90,7 +90,7 @@ async def qc_main( logger.log_artifacts(local_dir=output_path) -def download_dataframe(uri: str) -> pd.DataFrame: +def download_dataset(uri: str) -> pd.DataFrame: path = mlflow.artifacts.download_artifacts(artifact_uri=uri) df = pd.read_csv(path) return df @@ -100,7 +100,7 @@ def download_dataframe(uri: str) -> pd.DataFrame: @hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) @autolog def main(config: DictConfig, logger: MLFlowLogger) -> None: - df = download_dataframe(config.dataset.uri) + dataset = download_dataset(config.dataset.uri) output_path = Path(config.output_dir) output_path.mkdir(parents=True, exist_ok=True) @@ -108,7 +108,7 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: asyncio.run( qc_main( output_path=output_path.absolute().as_posix(), - slides=df["path"].to_list(), + slides=dataset["path"].to_list(), logger=logger, request_timeout=config.request_timeout, max_concurrent=config.max_concurrent, diff --git a/preprocessing/tissue_masks.py b/preprocessing/tissue_masks.py index 06efd2d..45285e3 100644 --- a/preprocessing/tissue_masks.py +++ b/preprocessing/tissue_masks.py @@ -34,7 +34,7 @@ def process_slide(slide_path: str, level: int, output_path: Path) -> None: write_big_tiff(mask, path=mask_path, mpp_x=mpp_x, mpp_y=mpp_y) -def download_dataframe(uri: str) -> pd.DataFrame: +def download_dataset(uri: str) -> pd.DataFrame: path = mlflow.artifacts.download_artifacts(artifact_uri=uri) df = pd.read_csv(path) return df @@ -44,11 +44,11 @@ def download_dataframe(uri: str) -> pd.DataFrame: @hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) @autolog def main(config: DictConfig, logger: MLFlowLogger) -> None: - df = download_dataframe(config.dataset.uri) + dataset = download_dataset(config.dataset.uri) with TemporaryDirectory() as output_dir: process_items( - df["path"].to_list(), + dataset["path"].to_list(), process_item=process_slide, fn_kwargs={ "level": config.level, From b91d36d0cd914262c251b08c6fd4bcefd733d520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:32:14 +0000 Subject: [PATCH 08/42] feat: confs --- .../preprocessing/tiling/knl_patos_224px.yaml | 17 +++++++++++++ .../preprocessing/tiling/knl_patos_320px.yaml | 17 +++++++++++++ .../preprocessing/tiling/knl_patos_75um.yaml | 17 +++++++++++++ configs/preprocessing/tiling.yaml | 24 +++++++++++++++++++ 4 files changed, 75 insertions(+) create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_224px.yaml create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_320px.yaml create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_75um.yaml create mode 100644 configs/preprocessing/tiling.yaml diff --git a/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml new file mode 100644 index 0000000..ace2f42 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml @@ -0,0 +1,17 @@ +# @package _global_ + +defaults: + - dataset: /data/processed/knl_patos + - _self_ + +mpp: 1.55 # level 2 +tile_extent: 224 +stride: 112 + +tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks +qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml new file mode 100644 index 0000000..48d6783 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml @@ -0,0 +1,17 @@ +# @package _global_ + +defaults: + - dataset: /data/processed/knl_patos + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 320 +stride: 160 + +tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks +qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml b/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml new file mode 100644 index 0000000..5db230b --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml @@ -0,0 +1,17 @@ +# @package _global_ + +defaults: + - dataset: /data/processed/knl_patos + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 430 # 75 / 0.17 ≈ 430 +stride: 215 + +tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks +qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/preprocessing/tiling.yaml b/configs/preprocessing/tiling.yaml new file mode 100644 index 0000000..2ac485c --- /dev/null +++ b/configs/preprocessing/tiling.yaml @@ -0,0 +1,24 @@ +# @package _global_ + +mpp: ??? +tile_extent: ??? +stride: ??? + +tissue_mask_uri: ??? +qc_mask_uri: ??? + +tissue_threshold: 0.5 + +splits: + train: ??? + test_preliminary: ??? + test_final: ??? + +metadata: + run_name: "🧱 Tiling: ${dataset.institution} ${tile_extent}" + description: Tile extraction for ${dataset.institution} institution with tile extent ${tile_extent} + hyperparams: + mpp: ${mpp} + tile_extent: ${tile_extent} + stride: ${stride} + tissue_threshold: ${tissue_threshold} From 413db4f9690dc51f83904733984e1a1c5888daa7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:32:24 +0000 Subject: [PATCH 09/42] feat: tiling --- preprocessing/tiling.py | 227 ++++++++++++++++++++++++++++++++ scripts/preprocessing/tiling.py | 20 +++ 2 files changed, 247 insertions(+) create mode 100644 preprocessing/tiling.py create mode 100644 scripts/preprocessing/tiling.py diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py new file mode 100644 index 0000000..d864295 --- /dev/null +++ b/preprocessing/tiling.py @@ -0,0 +1,227 @@ +from math import isclose +from pathlib import Path +from typing import Any, TypedDict, cast + +import hydra +import mlflow.artifacts +import pandas as pd +import ray +from omegaconf import DictConfig +from rationai.mlkit import with_cli_args +from rationai.mlkit.autolog import autolog +from rationai.mlkit.lightning.loggers import MLFlowLogger +from rationai.tiling.writers import save_mlflow_dataset +from ratiopath.ray import read_slides +from ratiopath.tiling import grid_tiles, relative_tile_overlay, tile_overlay +from ratiopath.tiling.utils import row_hash +from sklearn.model_selection import train_test_split + + +ray.init(runtime_env={"excludes": [".git", ".venv"]}) + + +QC_SUBFOLDERS = {"blur": "blur_per_pixel", "artifacts": "artifacts_per_pixel"} + + +class _RayCpuResources(TypedDict): + num_cpus: float + + +class _RayMemResources(TypedDict): + memory: int + + +LO_CPU: _RayCpuResources = {"num_cpus": 0.1} +HI_CPU: _RayCpuResources = {"num_cpus": 0.2} +LO_MEM: _RayMemResources = {"memory": 128 * 1024**2} +HI_MEM: _RayMemResources = {"memory": 1024**3} + + +def download_dataset(uri: str) -> pd.DataFrame: + path = mlflow.artifacts.download_artifacts(artifact_uri=uri) + df = pd.read_csv(path) + return df + + +def split_dataset( + dataset: pd.DataFrame, splits: dict[str, float], random_state: int = 42 +) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: + assert isclose( + splits["train"] + splits["preliminary_test"] + splits["final_test"], 1.0 + ), "Splits must sum to 1.0" + + train: pd.DataFrame + test: pd.DataFrame + test_preliminary: pd.DataFrame + test_final: pd.DataFrame + + if splits["train"] == 0.0: + train = pd.DataFrame(columns=dataset.columns) + test = dataset + else: + train, test = train_test_split( + dataset, + train_size=splits["train"], + stratify=dataset["nancy"], + random_state=random_state, + ) + + if splits["preliminary_test"] == 0.0: + test_preliminary = pd.DataFrame(columns=test.columns) + test_final = test + else: + preliminary_size = splits["preliminary_test"] / (1.0 - splits["train"]) + test_preliminary, test_final = train_test_split( + test, + train_size=preliminary_size, + stratify=test["nancy"], + random_state=random_state, + ) + + return train, test_preliminary, test_final + + +def nancy(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: + row["nancy_index"] = df.loc[Path(row["path"]).stem, "nancy"] + return row + + +def qc_agg(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: + qc_df = cast("pd.Series", df.loc[Path(row["path"]).stem]) + + row["blur_mean"] = qc_df["mean_coverage(Piqe)"] + row["artifacts_mean"] = qc_df["mean_coverage(ResidualArtifactsAndCoverage)"] + + return row + + +def tile(row: dict[str, Any]) -> list[dict[str, Any]]: + return [ + { + "tile_x": x, + "tile_y": y, + "path": row["path"], + "slide_id": row["id"], + "level": row["level"], + "tile_extent_x": row["tile_extent_x"], + "tile_extent_y": row["tile_extent_y"], + "mpp_x": row["mpp_x"], + "mpp_y": row["mpp_y"], + } + for x, y in grid_tiles( + slide_extent=(row["extent_x"], row["extent_y"]), + tile_extent=(row["tile_extent_x"], row["tile_extent_y"]), + stride=(row["stride_x"], row["stride_y"]), + ) + ] + + +def tissue(row: dict[str, Any], tissue_folder: Path) -> dict[str, Any]: + tissue_file = tissue_folder / Path(row["path"]).with_suffix(".tiff").name + overlay = relative_tile_overlay( + tissue_file, + (row["mpp_x"], row["mpp_y"]), + (row["tile_x"], row["tile_y"]), + (row["tile_extent_x"] // 4, row["tile_extent_y"] // 4), + (row["tile_extent_x"] // 2, row["tile_extent_y"] // 2), + ) + + row["tissue"] = (overlay == 255).sum() / overlay.size + return row + + +def filter_tissue(row: dict[str, Any], threshold: float) -> bool: + return row["tissue"] >= threshold + + +def qc(row: dict[str, Any], qc_folder: Path) -> dict[str, Any]: + for qc_key, subfolder in QC_SUBFOLDERS.items(): + qc_file = qc_folder / subfolder / Path(row["path"]).with_suffix(".tiff").name + overlay = tile_overlay( + qc_file, + (row["mpp_x"], row["mpp_y"]), + (row["tile_x"], row["tile_y"]), + (row["tile_extent_x"], row["tile_extent_y"]), + ) + + row[qc_key] = overlay.mean() / 255.0 + + return row + + +def select(row: dict[str, Any]) -> dict[str, Any]: + return { + "slide_id": row["slide_id"], + "x": row["tile_x"], + "y": row["tile_y"], + "tissue": row["tissue"], + "blur": row["blur"], + "artifacts": row["artifacts"], + } + + +def tiling( + df: pd.DataFrame, + qc_mask_uri: str, + tissue_mask_uri: str, + tile_extent: int, + stride: int, + mpp: float, + tissue_threshold: float, +) -> tuple[pd.DataFrame, pd.DataFrame]: + qc_folder = Path(mlflow.artifacts.download_artifacts(qc_mask_uri)) + qc_df = pd.read_csv(qc_folder / "qc_metrics.csv", index_col="slide_name") + tissue_folder = Path(mlflow.artifacts.download_artifacts(tissue_mask_uri)) + paths = df["slide_path"].tolist() + + slides = ( + read_slides(paths, tile_extent=tile_extent, stride=stride, mpp=mpp) + .map(row_hash, **LO_CPU, **LO_MEM) + .map(nancy, fn_args=(df,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] + .map(qc_agg, fn_args=(qc_df,), **HI_CPU, **LO_MEM) # type: ignore[reportArgumentType] + ) + + tiles = ( + slides.flat_map(tile, **HI_CPU, **LO_MEM) + .repartition(target_num_rows_per_block=4096) + .map(tissue, fn_args=(tissue_folder,), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] + .filter(filter_tissue, fn_args=(tissue_threshold,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] + .map(qc, fn_args=(qc_folder,), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] + .map(select, **LO_CPU, **LO_MEM) + ) + + return slides.to_pandas(), tiles.to_pandas() + + +@with_cli_args(["+preprocessing=tiling"]) +@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) +@autolog +def main(config: DictConfig, logger: MLFlowLogger) -> None: + dataset = download_dataset(config.dataset.uri) + + train, test_preliminary, test_final = split_dataset(dataset, config.splits) + + for df, name in [ + (train, "train"), + (test_preliminary, "test preliminary"), + (test_final, "test final"), + ]: + if df.empty: + continue + + df_slides, df_tiles = tiling( + df, + qc_mask_uri=config.qc_mask.uri, + tissue_mask_uri=config.tissue_mask.uri, + tile_extent=config.tile_extent, + stride=config.stride, + mpp=config.mpp, + tissue_threshold=config.tissue_threshold, + ) + save_mlflow_dataset( + df_slides, df_tiles, f"{name} - {config.dataset.institution}" + ) + + +if __name__ == "__main__": + main() diff --git a/scripts/preprocessing/tiling.py b/scripts/preprocessing/tiling.py new file mode 100644 index 0000000..61b8048 --- /dev/null +++ b/scripts/preprocessing/tiling.py @@ -0,0 +1,20 @@ +from kube_jobs import storage, submit_job + + +COHORT = "ikem" # "ikem", "ftn", or "knl_patos" +TILE_EXTENT = "224px" # "224px", "320px", or "75um" + +submit_job( + job_name=f"ulcerative-colitis-tiling-{COHORT.replace('_', '-')}-{TILE_EXTENT}", + username=..., + public=False, + cpu=64, + memory="128Gi", + script=[ + "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", + "cd workdir", + "uv sync --frozen", + f"uv run --active -m preprocessing.tiling +experiment=preprocessing/tiling/{COHORT}_{TILE_EXTENT}", + ], + storage=[storage.secure.DATA], +) From f5b4045b670c6e4013113ede3929393387b0c6c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:38:45 +0000 Subject: [PATCH 10/42] fix: confs --- configs/experiment/preprocessing/tiling/knl_patos_224px.yaml | 2 +- configs/experiment/preprocessing/tiling/knl_patos_320px.yaml | 2 +- configs/experiment/preprocessing/tiling/knl_patos_75um.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml index ace2f42..e5f203f 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - dataset: /data/processed/knl_patos + - /data/processed/knl_patos - _self_ mpp: 1.55 # level 2 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml index 48d6783..e535f72 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - dataset: /data/processed/knl_patos + - /data/processed/knl_patos - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml b/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml index 5db230b..0c59072 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - dataset: /data/processed/knl_patos + - /data/processed/knl_patos - _self_ mpp: 0.17 # level 0 From 2a578d115f8fcca086395066c4ebd98da39b6594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:43:59 +0000 Subject: [PATCH 11/42] fix: typo --- preprocessing/tiling.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index d864295..7e278d7 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -47,7 +47,7 @@ def split_dataset( dataset: pd.DataFrame, splits: dict[str, float], random_state: int = 42 ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: assert isclose( - splits["train"] + splits["preliminary_test"] + splits["final_test"], 1.0 + splits["train"] + splits["test_preliminary"] + splits["test_final"], 1.0 ), "Splits must sum to 1.0" train: pd.DataFrame @@ -66,11 +66,11 @@ def split_dataset( random_state=random_state, ) - if splits["preliminary_test"] == 0.0: + if splits["test_preliminary"] == 0.0: test_preliminary = pd.DataFrame(columns=test.columns) test_final = test else: - preliminary_size = splits["preliminary_test"] / (1.0 - splits["train"]) + preliminary_size = splits["test_preliminary"] / (1.0 - splits["train"]) test_preliminary, test_final = train_test_split( test, train_size=preliminary_size, From 9262f3ec0aa6660be487353e23b555f683465161 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:47:44 +0000 Subject: [PATCH 12/42] fix: typo --- preprocessing/tiling.py | 4 ++-- scripts/preprocessing/tiling.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 7e278d7..dc4e3ad 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -211,8 +211,8 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: df_slides, df_tiles = tiling( df, - qc_mask_uri=config.qc_mask.uri, - tissue_mask_uri=config.tissue_mask.uri, + qc_mask_uri=config.qc_mask_uri, + tissue_mask_uri=config.tissue_mask_uri, tile_extent=config.tile_extent, stride=config.stride, mpp=config.mpp, diff --git a/scripts/preprocessing/tiling.py b/scripts/preprocessing/tiling.py index 61b8048..cbc3bdd 100644 --- a/scripts/preprocessing/tiling.py +++ b/scripts/preprocessing/tiling.py @@ -10,6 +10,7 @@ public=False, cpu=64, memory="128Gi", + shm="48Gi", script=[ "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", "cd workdir", From b852be9944542c7845369b73d997a02d9cd7ca39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 17:50:14 +0000 Subject: [PATCH 13/42] fix: typo --- preprocessing/tiling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index dc4e3ad..17a7fcb 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -172,7 +172,7 @@ def tiling( qc_folder = Path(mlflow.artifacts.download_artifacts(qc_mask_uri)) qc_df = pd.read_csv(qc_folder / "qc_metrics.csv", index_col="slide_name") tissue_folder = Path(mlflow.artifacts.download_artifacts(tissue_mask_uri)) - paths = df["slide_path"].tolist() + paths = df["path"].tolist() slides = ( read_slides(paths, tile_extent=tile_extent, stride=stride, mpp=mpp) From bd8ca9f1af413a51d4e73c23539f2b3519cd026b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 18:18:18 +0000 Subject: [PATCH 14/42] fix: dataset index --- preprocessing/tiling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 17a7fcb..ac2cb11 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -39,7 +39,7 @@ class _RayMemResources(TypedDict): def download_dataset(uri: str) -> pd.DataFrame: path = mlflow.artifacts.download_artifacts(artifact_uri=uri) - df = pd.read_csv(path) + df = pd.read_csv(path, index_col="slide_id") return df From 08d33207aaa92deff4baeb8b8ef38c6bdfaba00b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 21:28:59 +0000 Subject: [PATCH 15/42] feat: update tiling to latest ratiopath --- preprocessing/tiling.py | 51 ++++++++++-------- pyproject.toml | 2 +- uv.lock | 116 +++++++++++++++++----------------------- 3 files changed, 79 insertions(+), 90 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index ac2cb11..39db64c 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -12,8 +12,10 @@ from rationai.mlkit.lightning.loggers import MLFlowLogger from rationai.tiling.writers import save_mlflow_dataset from ratiopath.ray import read_slides -from ratiopath.tiling import grid_tiles, relative_tile_overlay, tile_overlay +from ratiopath.tiling import grid_tiles, tile_overlay_overlap from ratiopath.tiling.utils import row_hash +from shapely import Polygon +from shapely.geometry import box from sklearn.model_selection import train_test_split @@ -95,6 +97,16 @@ def qc_agg(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: return row +def create_tissue_roi(tile_extent: int) -> Polygon: + offset = tile_extent // 4 + size = tile_extent // 2 + return box(offset, offset, offset + size, offset + size) + + +def create_qc_roi(tile_extent: int) -> Polygon: + return box(0, 0, tile_extent, tile_extent) + + def tile(row: dict[str, Any]) -> list[dict[str, Any]]: return [ { @@ -116,17 +128,13 @@ def tile(row: dict[str, Any]) -> list[dict[str, Any]]: ] -def tissue(row: dict[str, Any], tissue_folder: Path) -> dict[str, Any]: +def tissue(row: dict[str, Any], tissue_folder: Path, roi: Polygon) -> dict[str, Any]: tissue_file = tissue_folder / Path(row["path"]).with_suffix(".tiff").name - overlay = relative_tile_overlay( - tissue_file, - (row["mpp_x"], row["mpp_y"]), - (row["tile_x"], row["tile_y"]), - (row["tile_extent_x"] // 4, row["tile_extent_y"] // 4), - (row["tile_extent_x"] // 2, row["tile_extent_y"] // 2), - ) + overlap = tile_overlay_overlap( + roi, tissue_file, row["tile_x"], row["tile_y"], row["mpp_x"], row["mpp_y"] + ) # type: ignore[reportCallIssue] - row["tissue"] = (overlay == 255).sum() / overlay.size + row["tissue"] = 1.0 - overlap.get("0", 0) return row @@ -134,17 +142,13 @@ def filter_tissue(row: dict[str, Any], threshold: float) -> bool: return row["tissue"] >= threshold -def qc(row: dict[str, Any], qc_folder: Path) -> dict[str, Any]: +def qc(row: dict[str, Any], qc_folder: Path, roi: Polygon) -> dict[str, Any]: for qc_key, subfolder in QC_SUBFOLDERS.items(): qc_file = qc_folder / subfolder / Path(row["path"]).with_suffix(".tiff").name - overlay = tile_overlay( - qc_file, - (row["mpp_x"], row["mpp_y"]), - (row["tile_x"], row["tile_y"]), - (row["tile_extent_x"], row["tile_extent_y"]), - ) - - row[qc_key] = overlay.mean() / 255.0 + overlap = tile_overlay_overlap( + roi, qc_file, row["tile_x"], row["tile_y"], row["mpp_x"], row["mpp_y"] + ) # type: ignore[reportCallIssue] + row[qc_key] = 1.0 - overlap.get("0", 0) return row @@ -181,12 +185,15 @@ def tiling( .map(qc_agg, fn_args=(qc_df,), **HI_CPU, **LO_MEM) # type: ignore[reportArgumentType] ) + tissue_roi = create_tissue_roi(tile_extent) + qc_roi = create_qc_roi(tile_extent) + tiles = ( slides.flat_map(tile, **HI_CPU, **LO_MEM) - .repartition(target_num_rows_per_block=4096) - .map(tissue, fn_args=(tissue_folder,), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] + .repartition(target_num_rows_per_block=128) + .map(tissue, fn_args=(tissue_folder, tissue_roi), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] .filter(filter_tissue, fn_args=(tissue_threshold,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] - .map(qc, fn_args=(qc_folder,), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] + .map(qc, fn_args=(qc_folder, qc_roi), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] .map(select, **LO_CPU, **LO_MEM) ) diff --git a/pyproject.toml b/pyproject.toml index 3af5043..6933a1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "ray>=2.52.1", "torch>=2.9.0", "torchmetrics>=1.8.2", - "ratiopath>=1.0.3", + "ratiopath>=1.0.4", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index b05afd4..f61e800 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,15 @@ resolution-markers = [ "sys_platform == 'win32'", ] +[[package]] +name = "affine" +version = "2.4.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/98/d2f0bb06385069e799fc7d2870d9e078cfa0fa396dc8a2b81227d0da08b9/affine-2.4.0.tar.gz", hash = "sha256:a24d818d6a836c131976d22f8c27b8d3ca32d0af64c1d8d29deb7bafa4da1eea", size = 17132, upload-time = "2023-01-19T23:44:30.696Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0b/f7/85273299ab57117850cc0a936c64151171fac4da49bc6fba0dad984a7c5f/affine-2.4.0-py3-none-any.whl", hash = "sha256:8a3df80e2b2378aef598a83c1392efd47967afec4242021a0b06b4c7cbc61a92", size = 15662, upload-time = "2023-01-19T23:44:28.833Z" }, +] + [[package]] name = "aiohappyeyeballs" version = "2.6.1" @@ -259,6 +268,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" }, ] +[[package]] +name = "cligj" +version = "0.7.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/0d/837dbd5d8430fd0f01ed72c4cfb2f548180f4c68c635df84ce87956cff32/cligj-0.7.2.tar.gz", hash = "sha256:a4bc13d623356b373c2c27c53dbd9c68cae5d526270bfa71f6c6fa69669c6b27", size = 9803, upload-time = "2021-05-28T21:23:27.935Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/86/43fa9f15c5b9fb6e82620428827cd3c284aa933431405d1bcf5231ae3d3e/cligj-0.7.2-py3-none-any.whl", hash = "sha256:c1ca117dbce1fe20a5809dc96f01e1c2840f6dcc939b3ddbb1111bf330ba82df", size = 7069, upload-time = "2021-05-28T21:23:26.877Z" }, +] + [[package]] name = "cloudpickle" version = "3.1.2" @@ -1447,20 +1468,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, ] -[[package]] -name = "ome-types" -version = "0.6.3" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "pydantic-extra-types" }, - { name = "xsdata" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/48/4c/d252c1619c733eec9b4d2d21fe369fd21a2594954b396bf4352edea1e272/ome_types-0.6.3.tar.gz", hash = "sha256:eef4138cda5edfdcb2a44cfb90b714a59ead1b69e4c5ce5f9892ad397ccaaa68", size = 121784, upload-time = "2025-11-26T00:28:24.34Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fc/6a/1000cad1700ab0af4d1b1d0a9c23c34badddb4f547c008bde2a6c61968f1/ome_types-0.6.3-py3-none-any.whl", hash = "sha256:ce9753ff351bbc534ee5c5038d3cf60b1e4c13d69ad2e6b5a5b75de2a52521a5", size = 245802, upload-time = "2025-11-26T00:28:22.853Z" }, -] - [[package]] name = "omegaconf" version = "2.3.0" @@ -1589,19 +1596,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/41/585a168330ff063014880a80d744219dbf1dd7a1c706e75ab3425a987384/pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b", size = 10992722, upload-time = "2025-09-29T23:20:54.139Z" }, ] -[[package]] -name = "pandas-stubs" -version = "2.3.3.260113" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "numpy" }, - { name = "types-pytz" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/92/5d/be23854a73fda69f1dbdda7bc10fbd6f930bd1fa87aaec389f00c901c1e8/pandas_stubs-2.3.3.260113.tar.gz", hash = "sha256:076e3724bcaa73de78932b012ec64b3010463d377fa63116f4e6850643d93800", size = 116131, upload-time = "2026-01-13T22:30:16.704Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/d1/c6/df1fe324248424f77b89371116dab5243db7f052c32cc9fe7442ad9c5f75/pandas_stubs-2.3.3.260113-py3-none-any.whl", hash = "sha256:ec070b5c576e1badf12544ae50385872f0631fc35d99d00dc598c2954ec564d3", size = 168246, upload-time = "2026-01-13T22:30:15.244Z" }, -] - [[package]] name = "parso" version = "0.8.5" @@ -1834,19 +1828,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" }, ] -[[package]] -name = "pydantic-extra-types" -version = "2.11.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "pydantic" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/fd/35/2fee58b1316a73e025728583d3b1447218a97e621933fc776fb8c0f2ebdd/pydantic_extra_types-2.11.0.tar.gz", hash = "sha256:4e9991959d045b75feb775683437a97991d02c138e00b59176571db9ce634f0e", size = 157226, upload-time = "2025-12-31T16:18:27.944Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/fe/17/fabd56da47096d240dd45ba627bead0333b0cf0ee8ada9bec579287dadf3/pydantic_extra_types-2.11.0-py3-none-any.whl", hash = "sha256:84b864d250a0fc62535b7ec591e36f2c5b4d1325fa0017eb8cda9aeb63b374a6", size = 74296, upload-time = "2025-12-31T16:18:26.38Z" }, -] - [[package]] name = "pygments" version = "2.19.2" @@ -2002,6 +1983,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/c2/c012beae5f76b72f007a9e91ee9401cb88c51d0f83c6257a03e785c81cc2/pyzmq-27.1.0-cp312-abi3-win_arm64.whl", hash = "sha256:75a2f36223f0d535a0c919e23615fc85a1e23b71f40c7eb43d7b1dedb4d8f15f", size = 552993, upload-time = "2025-09-08T23:08:18.926Z" }, ] +[[package]] +name = "rasterio" +version = "1.5.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "affine" }, + { name = "attrs" }, + { name = "certifi" }, + { name = "click" }, + { name = "cligj" }, + { name = "numpy" }, + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/88/edb4b66b6cb2c13f123af5a3896bf70c0cbe73ab3cd4243cb4eb0212a0f6/rasterio-1.5.0.tar.gz", hash = "sha256:1e0ea56b02eea4989b36edf8e58a5a3ef40e1b7edcb04def2603accd5ab3ee7b", size = 452184, upload-time = "2026-01-05T16:06:47.169Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ac/de/ba1cd11d7d1182bfb26e758bf07016d04e5442f4f5fea35b0d7279b72399/rasterio-1.5.0-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:420656074897a460f5ef46f657b3061d2e004f9d99e613914b0671643e69d92c", size = 22787192, upload-time = "2026-01-05T16:05:19.779Z" }, + { url = "https://files.pythonhosted.org/packages/e6/42/efaeb6dc531dbcd02fec01c791a853bb5a139a5126ecec579ac0f735eeb9/rasterio-1.5.0-cp312-cp312-macosx_15_0_x86_64.whl", hash = "sha256:c5c3597a783857e760550e8f26365d928b0377ac5ffc3e12ba447ac65ca5406d", size = 24412221, upload-time = "2026-01-05T16:05:22.526Z" }, + { url = "https://files.pythonhosted.org/packages/a2/14/89645988424c40cbcb8334f94305ffe094dd28d85c643341d9690704c9f0/rasterio-1.5.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e14d07a09833b6df6024ce7a57aee1e1977b3aec682e30b1e58ce773462f2382", size = 36128020, upload-time = "2026-01-05T16:05:25.556Z" }, + { url = "https://files.pythonhosted.org/packages/85/23/5a52319a98451ff910f42e5f7f4804bfb39f9327933a89daab685d1ce2dd/rasterio-1.5.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:26dbcffcf0d01fc121cbb92186bc1cb78e16efe62b17be45ad7494446b325cf8", size = 37634010, upload-time = "2026-01-05T16:05:28.673Z" }, + { url = "https://files.pythonhosted.org/packages/57/d6/fe8826f813c98b046d8d4c3bc83053c89c71f367f89257d211fe5dd0b0ba/rasterio-1.5.0-cp312-cp312-win_amd64.whl", hash = "sha256:ac8d04eee66ca8060763ead607800e5611d857dd005905d920365e24a16ba20a", size = 30142328, upload-time = "2026-01-05T16:05:31.357Z" }, + { url = "https://files.pythonhosted.org/packages/af/62/6397379271d5628ed65ef781bf2d3a8f56094a86e6d8479c6ca506a1b960/rasterio-1.5.0-cp312-cp312-win_arm64.whl", hash = "sha256:31f1edc45c781ebd087e60cc00a4fc37028dd3fe25cff4098e4139fc9d0565be", size = 28500710, upload-time = "2026-01-05T16:05:33.906Z" }, +] + [[package]] name = "rationai-kube-jobs" version = "0.4.0" @@ -2069,19 +2073,18 @@ dependencies = [ [[package]] name = "ratiopath" -version = "1.0.3" +version = "1.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "albumentations" }, { name = "geopandas" }, { name = "imagecodecs" }, { name = "numpy" }, - { name = "ome-types" }, { name = "openslide-python" }, { name = "pandas" }, - { name = "pandas-stubs" }, { name = "pillow" }, { name = "pyvips" }, + { name = "rasterio" }, { name = "ray", extra = ["data"] }, { name = "scikit-image" }, { name = "shapely" }, @@ -2089,9 +2092,9 @@ dependencies = [ { name = "torch" }, { name = "zarr" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/71/71/84b7a927eb9ea410e793011e14257b291a0f8917bd09ff03f46475bf763d/ratiopath-1.0.3.tar.gz", hash = "sha256:d70d6fa8a387422e04e7b1535c2131830f9573cd6d58bd4b3167c3d008637e87", size = 21706, upload-time = "2025-09-29T20:23:52.047Z" } +sdist = { url = "https://files.pythonhosted.org/packages/b5/f0/43e2dbec23419c7ced4cc26fcfa1b2f34140833ac493ab99c5eecdd62611/ratiopath-1.0.4.tar.gz", hash = "sha256:8a7ab4118d1a23aba23e856bdc463a95d27546acf4a6b509c65d3ff437933dbc", size = 24303, upload-time = "2026-01-27T21:10:45.644Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/3b/f8/802c5f470f87afca0036f36c77067cde560c1225ca04df493ad3722ceee1/ratiopath-1.0.3-py3-none-any.whl", hash = "sha256:cd2a217c261421820b3385c70fa2bdb5b9c9635bff0e5fe7655169cba7ac198d", size = 28040, upload-time = "2025-09-29T20:23:50.957Z" }, + { url = "https://files.pythonhosted.org/packages/83/33/5db8e9cd0a16774017317b4e1a53c0bce7f76c7b6f94f81ea05e19ed6431/ratiopath-1.0.4-py3-none-any.whl", hash = "sha256:cc0d971531af2999b4325e426434d2becf1d30334d3bdcb8af032826f7c29ad3", size = 30860, upload-time = "2026-01-27T21:10:44.233Z" }, ] [[package]] @@ -2583,15 +2586,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, ] -[[package]] -name = "types-pytz" -version = "2025.2.0.20251108" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/40/ff/c047ddc68c803b46470a357454ef76f4acd8c1088f5cc4891cdd909bfcf6/types_pytz-2025.2.0.20251108.tar.gz", hash = "sha256:fca87917836ae843f07129567b74c1929f1870610681b4c92cb86a3df5817bdb", size = 10961, upload-time = "2025-11-08T02:55:57.001Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/c1/56ef16bf5dcd255155cc736d276efa6ae0a5c26fd685e28f0412a4013c01/types_pytz-2025.2.0.20251108-py3-none-any.whl", hash = "sha256:0f1c9792cab4eb0e46c52f8845c8f77cf1e313cb3d68bf826aa867fe4717d91c", size = 10116, upload-time = "2025-11-08T02:55:56.194Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0" @@ -2667,7 +2661,7 @@ requires-dist = [ { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, - { name = "ratiopath", specifier = ">=1.0.3" }, + { name = "ratiopath", specifier = ">=1.0.4" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, @@ -2742,18 +2736,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ad/e4/8d97cca767bcc1be76d16fb76951608305561c6e056811587f36cb1316a8/werkzeug-3.1.5-py3-none-any.whl", hash = "sha256:5111e36e91086ece91f93268bb39b4a35c1e6f1feac762c9c822ded0a4e322dc", size = 225025, upload-time = "2026-01-08T17:49:21.859Z" }, ] -[[package]] -name = "xsdata" -version = "26.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/24/9f/c1b0fa54f2b3f43989015668daddd8bd50d3bf3461ba8b79fdeb8000b27d/xsdata-26.1.tar.gz", hash = "sha256:dcae2c0e5f329f1b4e09a2d148c96941c9556616bb5e3418970a63c5eb2cd831", size = 348523, upload-time = "2026-01-19T19:03:54.347Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/7b/48/37775c15adba3070f0af934a6617696a608a75f2e01be37a454e298cf32c/xsdata-26.1-py3-none-any.whl", hash = "sha256:aa02adf1b75668e7b685f4de3c4507e4aa31af3f2e2210fd45aa2f0e4a637a99", size = 235422, upload-time = "2026-01-19T19:03:52.684Z" }, -] - [[package]] name = "yarl" version = "1.22.0" From 3ab6d5a55e9ccfb87deb544f0d95c972bb676d97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 27 Jan 2026 21:53:21 +0000 Subject: [PATCH 16/42] fix: use tile overlay overlap as udfexpr --- preprocessing/tiling.py | 82 ++++++++++++++++++++++++++++++----------- 1 file changed, 61 insertions(+), 21 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 39db64c..3764a5a 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -14,6 +14,7 @@ from ratiopath.ray import read_slides from ratiopath.tiling import grid_tiles, tile_overlay_overlap from ratiopath.tiling.utils import row_hash +from ray.data.expressions import col from shapely import Polygon from shapely.geometry import box from sklearn.model_selection import train_test_split @@ -97,6 +98,16 @@ def qc_agg(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: return row +def add_mask_paths( + row: dict[str, Any], qc_folder: Path, tissue_folder: Path +) -> dict[str, Any]: + stem = Path(row["path"]).stem + row["tissue_mask_path"] = str(tissue_folder / f"{stem}.tiff") + for key, subfolder in QC_SUBFOLDERS.items(): + row[f"{key}_mask_path"] = str(qc_folder / subfolder / f"{stem}.tiff") + return row + + def create_tissue_roi(tile_extent: int) -> Polygon: offset = tile_extent // 4 size = tile_extent // 2 @@ -119,6 +130,9 @@ def tile(row: dict[str, Any]) -> list[dict[str, Any]]: "tile_extent_y": row["tile_extent_y"], "mpp_x": row["mpp_x"], "mpp_y": row["mpp_y"], + "tissue_mask_path": row["tissue_mask_path"], + "blur_mask_path": row["blur_mask_path"], + "artifacts_mask_path": row["artifacts_mask_path"], } for x, y in grid_tiles( slide_extent=(row["extent_x"], row["extent_y"]), @@ -128,13 +142,10 @@ def tile(row: dict[str, Any]) -> list[dict[str, Any]]: ] -def tissue(row: dict[str, Any], tissue_folder: Path, roi: Polygon) -> dict[str, Any]: - tissue_file = tissue_folder / Path(row["path"]).with_suffix(".tiff").name - overlap = tile_overlay_overlap( - roi, tissue_file, row["tile_x"], row["tile_y"], row["mpp_x"], row["mpp_y"] - ) # type: ignore[reportCallIssue] - - row["tissue"] = 1.0 - overlap.get("0", 0) +def extract_coverages(row: dict[str, Any], *cols) -> dict[str, Any]: + for c in cols: + overlap = row[f"{c}_overlap"] + row[c] = 1.0 - overlap.get("0", 0) return row @@ -142,17 +153,6 @@ def filter_tissue(row: dict[str, Any], threshold: float) -> bool: return row["tissue"] >= threshold -def qc(row: dict[str, Any], qc_folder: Path, roi: Polygon) -> dict[str, Any]: - for qc_key, subfolder in QC_SUBFOLDERS.items(): - qc_file = qc_folder / subfolder / Path(row["path"]).with_suffix(".tiff").name - overlap = tile_overlay_overlap( - roi, qc_file, row["tile_x"], row["tile_y"], row["mpp_x"], row["mpp_y"] - ) # type: ignore[reportCallIssue] - row[qc_key] = 1.0 - overlap.get("0", 0) - - return row - - def select(row: dict[str, Any]) -> dict[str, Any]: return { "slide_id": row["slide_id"], @@ -174,8 +174,8 @@ def tiling( tissue_threshold: float, ) -> tuple[pd.DataFrame, pd.DataFrame]: qc_folder = Path(mlflow.artifacts.download_artifacts(qc_mask_uri)) - qc_df = pd.read_csv(qc_folder / "qc_metrics.csv", index_col="slide_name") tissue_folder = Path(mlflow.artifacts.download_artifacts(tissue_mask_uri)) + qc_df = pd.read_csv(qc_folder / "qc_metrics.csv", index_col="slide_name") paths = df["path"].tolist() slides = ( @@ -183,6 +183,7 @@ def tiling( .map(row_hash, **LO_CPU, **LO_MEM) .map(nancy, fn_args=(df,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] .map(qc_agg, fn_args=(qc_df,), **HI_CPU, **LO_MEM) # type: ignore[reportArgumentType] + .map(add_mask_paths, fn_args=(qc_folder, tissue_folder), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] ) tissue_roi = create_tissue_roi(tile_extent) @@ -191,9 +192,48 @@ def tiling( tiles = ( slides.flat_map(tile, **HI_CPU, **LO_MEM) .repartition(target_num_rows_per_block=128) - .map(tissue, fn_args=(tissue_folder, tissue_roi), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] + .with_column( + "tissue_overlap", + tile_overlay_overlap( + tissue_roi, + col("tissue_mask_path"), + col("tile_x"), + col("tile_y"), + col("mpp_x"), + col("mpp_y"), + ), # type: ignore[reportCallIssue] + **HI_CPU, + **HI_MEM, + ) + .map(extract_coverages, fn_args=("tissue",), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] .filter(filter_tissue, fn_args=(tissue_threshold,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] - .map(qc, fn_args=(qc_folder, qc_roi), **HI_CPU, **HI_MEM) # type: ignore[reportArgumentType] + .with_column( + "blur_overlap", + tile_overlay_overlap( + qc_roi, + col("blur_mask_path"), + col("tile_x"), + col("tile_y"), + col("mpp_x"), + col("mpp_y"), + ), # type: ignore[reportCallIssue] + **HI_CPU, + **HI_MEM, + ) + .with_column( + "artifacts_overlap", + tile_overlay_overlap( + qc_roi, + col("artifacts_mask_path"), + col("tile_x"), + col("tile_y"), + col("mpp_x"), + col("mpp_y"), + ), # type: ignore[reportCallIssue] + **HI_CPU, + **HI_MEM, + ) + .map(extract_coverages, fn_args=("blur", "artifacts"), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] .map(select, **LO_CPU, **LO_MEM) ) From 06e957e9991de30f0f94b36c72279910ee5655d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 12:16:45 +0000 Subject: [PATCH 17/42] chore: ratiopath from github --- pyproject.toml | 3 ++- uv.lock | 8 ++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6933a1f..4477ed5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "ray>=2.52.1", "torch>=2.9.0", "torchmetrics>=1.8.2", - "ratiopath>=1.0.4", + "ratiopath", ] [dependency-groups] @@ -33,3 +33,4 @@ rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/ rationai-tiling = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" } rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" } rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" } +ratiopath = { git = "https://github.com/RationAI/ratiopath.git", branch = "fix/pyarrow-arange" } diff --git a/uv.lock b/uv.lock index f61e800..fac3551 100644 --- a/uv.lock +++ b/uv.lock @@ -2074,7 +2074,7 @@ dependencies = [ [[package]] name = "ratiopath" version = "1.0.4" -source = { registry = "https://pypi.org/simple" } +source = { git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fpyarrow-arange#11f4506e482f3fc9a9969f0cd39fb748118cdd0b" } dependencies = [ { name = "albumentations" }, { name = "geopandas" }, @@ -2092,10 +2092,6 @@ dependencies = [ { name = "torch" }, { name = "zarr" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/b5/f0/43e2dbec23419c7ced4cc26fcfa1b2f34140833ac493ab99c5eecdd62611/ratiopath-1.0.4.tar.gz", hash = "sha256:8a7ab4118d1a23aba23e856bdc463a95d27546acf4a6b509c65d3ff437933dbc", size = 24303, upload-time = "2026-01-27T21:10:45.644Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/83/33/5db8e9cd0a16774017317b4e1a53c0bce7f76c7b6f94f81ea05e19ed6431/ratiopath-1.0.4-py3-none-any.whl", hash = "sha256:cc0d971531af2999b4325e426434d2becf1d30334d3bdcb8af032826f7c29ad3", size = 30860, upload-time = "2026-01-27T21:10:44.233Z" }, -] [[package]] name = "ray" @@ -2661,7 +2657,7 @@ requires-dist = [ { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, - { name = "ratiopath", specifier = ">=1.0.4" }, + { name = "ratiopath", git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fpyarrow-arange" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, From de0fd753cf0f89dd7950760ecd6708236c14336b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 15:00:02 +0000 Subject: [PATCH 18/42] fix: WIP --- preprocessing/tiling.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 3764a5a..6aafe3d 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -145,7 +145,11 @@ def tile(row: dict[str, Any]) -> list[dict[str, Any]]: def extract_coverages(row: dict[str, Any], *cols) -> dict[str, Any]: for c in cols: overlap = row[f"{c}_overlap"] - row[c] = 1.0 - overlap.get("0", 0) + try: + row[c] = 1.0 - overlap.get("0", 0) + except TypeError as e: + # Raise same error but with overlap info for easier debugging + raise TypeError(f"Invalid overlap data: {overlap}") from e return row From e56216b813772b7ec65b1c6f27b92cc48b603673 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 15:37:33 +0000 Subject: [PATCH 19/42] fix: None in overlap --- preprocessing/tiling.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 6aafe3d..0b9efb9 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -145,11 +145,11 @@ def tile(row: dict[str, Any]) -> list[dict[str, Any]]: def extract_coverages(row: dict[str, Any], *cols) -> dict[str, Any]: for c in cols: overlap = row[f"{c}_overlap"] - try: - row[c] = 1.0 - overlap.get("0", 0) - except TypeError as e: - # Raise same error but with overlap info for easier debugging - raise TypeError(f"Invalid overlap data: {overlap}") from e + zero_overlap = overlap.get("0", 0) + if zero_overlap is None: + row[c] = 1.0 + else: + row[c] = 1.0 - zero_overlap return row From 95cc81cf87833fbe37482270443931b167534610 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 17:34:07 +0000 Subject: [PATCH 20/42] feat: confs --- configs/data/processed_w_masks/ftn.yaml | 7 +++++++ configs/data/processed_w_masks/ikem.yaml | 7 +++++++ configs/data/processed_w_masks/knl_patos.yaml | 7 +++++++ .../preprocessing/tiling/ftn_0_320px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ftn_0_75um.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ftn_1_224px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ftn_2_224px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ikem_0_320px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ikem_0_75um.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ikem_1_224px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/ikem_2_224px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/knl_patos_0_320px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/knl_patos_0_75um.yaml | 14 ++++++++++++++ .../preprocessing/tiling/knl_patos_1_224px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/knl_patos_224px.yaml | 17 ----------------- .../preprocessing/tiling/knl_patos_2_224px.yaml | 14 ++++++++++++++ .../preprocessing/tiling/knl_patos_320px.yaml | 17 ----------------- .../preprocessing/tiling/knl_patos_75um.yaml | 17 ----------------- preprocessing/tiling.py | 4 ++-- 19 files changed, 191 insertions(+), 53 deletions(-) create mode 100644 configs/data/processed_w_masks/ftn.yaml create mode 100644 configs/data/processed_w_masks/ikem.yaml create mode 100644 configs/data/processed_w_masks/knl_patos.yaml create mode 100644 configs/experiment/preprocessing/tiling/ftn_0_320px.yaml create mode 100644 configs/experiment/preprocessing/tiling/ftn_0_75um.yaml create mode 100644 configs/experiment/preprocessing/tiling/ftn_1_224px.yaml create mode 100644 configs/experiment/preprocessing/tiling/ftn_2_224px.yaml create mode 100644 configs/experiment/preprocessing/tiling/ikem_0_320px.yaml create mode 100644 configs/experiment/preprocessing/tiling/ikem_0_75um.yaml create mode 100644 configs/experiment/preprocessing/tiling/ikem_1_224px.yaml create mode 100644 configs/experiment/preprocessing/tiling/ikem_2_224px.yaml create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml delete mode 100644 configs/experiment/preprocessing/tiling/knl_patos_224px.yaml create mode 100644 configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml delete mode 100644 configs/experiment/preprocessing/tiling/knl_patos_320px.yaml delete mode 100644 configs/experiment/preprocessing/tiling/knl_patos_75um.yaml diff --git a/configs/data/processed_w_masks/ftn.yaml b/configs/data/processed_w_masks/ftn.yaml new file mode 100644 index 0000000..519dc93 --- /dev/null +++ b/configs/data/processed_w_masks/ftn.yaml @@ -0,0 +1,7 @@ +# @package _global_ + +dataset: + institution: ftn + uri: mlflow-artifacts:/86/142642fc780f4a96800c691168b5c2c3/artifacts/dataset.csv # TODO update URI + tissue_mask_uri: mlflow-artifacts:/86/04778b10de254572b69ce0a101c1eee4/artifacts/tissue_masks # TODO update URI + qc_mask_uri: ??? \ No newline at end of file diff --git a/configs/data/processed_w_masks/ikem.yaml b/configs/data/processed_w_masks/ikem.yaml new file mode 100644 index 0000000..c7c6c59 --- /dev/null +++ b/configs/data/processed_w_masks/ikem.yaml @@ -0,0 +1,7 @@ +# @package _global_ + +dataset: + institution: ikem + uri: mlflow-artifacts:/86/52d0081d60ba4585a16a3bd341d5ab09/artifacts/dataset.csv # TODO update URI + tissue_mask_uri: mlflow-artifacts:/86/13359cdd5d1a47ddabc352b9aa0d7635/artifacts/tissue_masks # TODO update URI + qc_mask_uri: ??? \ No newline at end of file diff --git a/configs/data/processed_w_masks/knl_patos.yaml b/configs/data/processed_w_masks/knl_patos.yaml new file mode 100644 index 0000000..67bf0d8 --- /dev/null +++ b/configs/data/processed_w_masks/knl_patos.yaml @@ -0,0 +1,7 @@ +# @package _global_ + +dataset: + institution: knl_patos + uri: mlflow-artifacts:/86/afbcfd43cb3c4fd0b1e9b5dbe7327d91/artifacts/dataset.csv # TODO update URI + tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks # TODO update URI + qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml new file mode 100644 index 0000000..05dc26e --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ftn + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 320 +stride: 160 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml new file mode 100644 index 0000000..7e9e588 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ftn + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 430 # 75 / 0.17 ≈ 430 +stride: 215 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml b/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml new file mode 100644 index 0000000..24c31a7 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ftn + - _self_ + +mpp: 0.52 # level 1 +tile_extent: 224 +stride: 112 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml b/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml new file mode 100644 index 0000000..6a33208 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ftn + - _self_ + +mpp: 1.55 # level 2 +tile_extent: 224 +stride: 112 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml new file mode 100644 index 0000000..c4bb4c8 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ikem + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 320 +stride: 160 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml new file mode 100644 index 0000000..04326de --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ikem + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 430 # 75 / 0.17 ≈ 430 +stride: 215 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml b/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml new file mode 100644 index 0000000..bc7905c --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ikem + - _self_ + +mpp: 0.52 # level 1 +tile_extent: 224 +stride: 112 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml b/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml new file mode 100644 index 0000000..0837045 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/ikem + - _self_ + +mpp: 1.55 # level 2 +tile_extent: 224 +stride: 112 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml new file mode 100644 index 0000000..ee411e2 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/knl_patos + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 320 +stride: 160 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml b/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml new file mode 100644 index 0000000..1daf190 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/knl_patos + - _self_ + +mpp: 0.17 # level 0 +tile_extent: 430 # 75 / 0.17 ≈ 430 +stride: 215 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml new file mode 100644 index 0000000..51992fd --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/knl_patos + - _self_ + +mpp: 0.52 # level 1 +tile_extent: 224 +stride: 112 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml deleted file mode 100644 index e5f203f..0000000 --- a/configs/experiment/preprocessing/tiling/knl_patos_224px.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# @package _global_ - -defaults: - - /data/processed/knl_patos - - _self_ - -mpp: 1.55 # level 2 -tile_extent: 224 -stride: 112 - -tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks -qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts - -splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml new file mode 100644 index 0000000..fec9d84 --- /dev/null +++ b/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml @@ -0,0 +1,14 @@ +# @package _global_ + +defaults: + - /data/processed_w_masks/knl_patos + - _self_ + +mpp: 1.55 # level 2 +tile_extent: 224 +stride: 112 + +splits: + train: 0.0 + test_preliminary: 0.5 + test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml deleted file mode 100644 index e535f72..0000000 --- a/configs/experiment/preprocessing/tiling/knl_patos_320px.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# @package _global_ - -defaults: - - /data/processed/knl_patos - - _self_ - -mpp: 0.17 # level 0 -tile_extent: 320 -stride: 160 - -tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks -qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts - -splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml b/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml deleted file mode 100644 index 0c59072..0000000 --- a/configs/experiment/preprocessing/tiling/knl_patos_75um.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# @package _global_ - -defaults: - - /data/processed/knl_patos - - _self_ - -mpp: 0.17 # level 0 -tile_extent: 430 # 75 / 0.17 ≈ 430 -stride: 215 - -tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks -qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts - -splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 0b9efb9..4880286 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -262,8 +262,8 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: df_slides, df_tiles = tiling( df, - qc_mask_uri=config.qc_mask_uri, - tissue_mask_uri=config.tissue_mask_uri, + qc_mask_uri=config.dataset.qc_mask_uri, + tissue_mask_uri=config.dataset.tissue_mask_uri, tile_extent=config.tile_extent, stride=config.stride, mpp=config.mpp, From 754698f339060f308c7cf0730715ca767b28ea12 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 18:13:54 +0000 Subject: [PATCH 21/42] chore: dependencies --- pyproject.toml | 3 +-- uv.lock | 10 +++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4477ed5..6933a1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "ray>=2.52.1", "torch>=2.9.0", "torchmetrics>=1.8.2", - "ratiopath", + "ratiopath>=1.0.4", ] [dependency-groups] @@ -33,4 +33,3 @@ rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/ rationai-tiling = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" } rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" } rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" } -ratiopath = { git = "https://github.com/RationAI/ratiopath.git", branch = "fix/pyarrow-arange" } diff --git a/uv.lock b/uv.lock index fac3551..352dc6b 100644 --- a/uv.lock +++ b/uv.lock @@ -2073,8 +2073,8 @@ dependencies = [ [[package]] name = "ratiopath" -version = "1.0.4" -source = { git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fpyarrow-arange#11f4506e482f3fc9a9969f0cd39fb748118cdd0b" } +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "albumentations" }, { name = "geopandas" }, @@ -2092,6 +2092,10 @@ dependencies = [ { name = "torch" }, { name = "zarr" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/7f/ce/93e454d7a0c5928be065b0e31771162d2c8e1cdf736b3049a4aa9da526db/ratiopath-1.1.0.tar.gz", hash = "sha256:a7af55867f132422b83b37ba8d0ad2b3fbac7bb2031e107c8a468533ff8cc20c", size = 24276, upload-time = "2026-01-28T17:59:37.021Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/49/195ecc09b87347dbd4cdb42b2152ccb8369975baf53b65d7b577f81a7e23/ratiopath-1.1.0-py3-none-any.whl", hash = "sha256:5db5845dd2a8b23fe94501edc97aa812b94813b658e28d14b528f097bd1cc267", size = 30858, upload-time = "2026-01-28T17:59:35.906Z" }, +] [[package]] name = "ray" @@ -2657,7 +2661,7 @@ requires-dist = [ { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, - { name = "ratiopath", git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fpyarrow-arange" }, + { name = "ratiopath", specifier = ">=1.0.4" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, From 178f29412bdc50cd90d05d53b22aaf6470df3376 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 16:57:31 +0000 Subject: [PATCH 22/42] feat: tile = stride --- configs/experiment/preprocessing/tiling/ftn_0_320px.yaml | 2 +- configs/experiment/preprocessing/tiling/ftn_0_75um.yaml | 2 +- configs/experiment/preprocessing/tiling/ikem_0_320px.yaml | 2 +- configs/experiment/preprocessing/tiling/ikem_0_75um.yaml | 2 +- configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml | 2 +- configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml index 05dc26e..4aca578 100644 --- a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml @@ -6,7 +6,7 @@ defaults: mpp: 0.17 # level 0 tile_extent: 320 -stride: 160 +stride: 320 splits: train: 0.0 diff --git a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml index 7e9e588..5d42aab 100644 --- a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml @@ -6,7 +6,7 @@ defaults: mpp: 0.17 # level 0 tile_extent: 430 # 75 / 0.17 ≈ 430 -stride: 215 +stride: 430 splits: train: 0.0 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml index c4bb4c8..076a282 100644 --- a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml @@ -6,7 +6,7 @@ defaults: mpp: 0.17 # level 0 tile_extent: 320 -stride: 160 +stride: 320 splits: train: 0.0 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml index 04326de..a4bfc6d 100644 --- a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml @@ -6,7 +6,7 @@ defaults: mpp: 0.17 # level 0 tile_extent: 430 # 75 / 0.17 ≈ 430 -stride: 215 +stride: 430 splits: train: 0.0 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml index ee411e2..60764af 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml @@ -6,7 +6,7 @@ defaults: mpp: 0.17 # level 0 tile_extent: 320 -stride: 160 +stride: 320 splits: train: 0.0 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml b/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml index 1daf190..2d84218 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml @@ -6,7 +6,7 @@ defaults: mpp: 0.17 # level 0 tile_extent: 430 # 75 / 0.17 ≈ 430 -stride: 215 +stride: 430 splits: train: 0.0 From e4d2499dbadc3cf10565ff7e5481d8f1242c14b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 19:50:25 +0000 Subject: [PATCH 23/42] fix: typo --- preprocessing/quality_control.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index bce3912..191cabf 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -26,11 +26,11 @@ class QCParameters(TypedDict): def get_qc_masks(qc_parameters: QCParameters) -> Generator[tuple[str, str], None, None]: - if qc_parameters["check_residual"]: + if qc_parameters["check_focus"]: yield ("Piqe_focus_score_piqe_median", "blur_per_tile") yield ("Piqe_piqe_median_activity_mask", "blur_per_pixel") - if qc_parameters["check_focus"]: + if qc_parameters["check_residual"]: yield ("ResidualArtifactsAndCoverage_cov_percent_heatmap", "artifacts_per_tile") yield ("ResidualArtifactsAndCoverage_coverage_mask", "artifacts_per_pixel") From acfbf19f02d4fb5ce64b3370c357f41bc574a797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 20:21:16 +0000 Subject: [PATCH 24/42] fix: glob over changing dir --- preprocessing/quality_control.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index 191cabf..caf1fa7 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -42,7 +42,8 @@ def organize_masks(output_path: Path, subdir: str, mask_prefix: str) -> None: prefix_dir = output_path / subdir prefix_dir.mkdir(parents=True, exist_ok=True) - for file in output_path.glob(f"{mask_prefix}_*.tiff"): + # Glob has to be wrapped in list, because we're modifying the directory!!! + for file in list(output_path.glob(f"{mask_prefix}_*.tiff")): slide_name = file.name.replace(f"{mask_prefix}_", "") destination = prefix_dir / slide_name file.rename(destination) From 659f505b7e3bf57cff1b120104d43637825ac528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 20:21:45 +0000 Subject: [PATCH 25/42] fix: finish the run --- preprocessing/quality_control.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index caf1fa7..db870db 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -58,21 +58,21 @@ async def qc_main( qc_parameters: QCParameters, ) -> None: async with rationai.AsyncClient() as client: # type: ignore[attr-defined] - async for result in tqdm( - client.qc.check_slides( - slides, - output_path, - config=SlideCheckConfig(**qc_parameters), - timeout=request_timeout, - max_concurrent=max_concurrent, - ), - total=len(slides), - ): - if not result.success: - with open(Path(output_path) / "qc_errors.log", "a") as log_file: - log_file.write( - f"Failed to process {result.wsi_path}: {result.error}\n" - ) + # async for result in tqdm( + # client.qc.check_slides( + # slides, + # output_path, + # config=SlideCheckConfig(**qc_parameters), + # timeout=request_timeout, + # max_concurrent=max_concurrent, + # ), + # total=len(slides), + # ): + # if not result.success: + # with open(Path(output_path) / "qc_errors.log", "a") as log_file: + # log_file.write( + # f"Failed to process {result.wsi_path}: {result.error}\n" + # ) # Organize generated masks into subdirectories for prefix, artifact_name in get_qc_masks(qc_parameters): From 45ac3b307adb99b0ce31df3afe57dea1a52625c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 20:34:33 +0000 Subject: [PATCH 26/42] fix: rever last commit --- preprocessing/quality_control.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index db870db..caf1fa7 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -58,21 +58,21 @@ async def qc_main( qc_parameters: QCParameters, ) -> None: async with rationai.AsyncClient() as client: # type: ignore[attr-defined] - # async for result in tqdm( - # client.qc.check_slides( - # slides, - # output_path, - # config=SlideCheckConfig(**qc_parameters), - # timeout=request_timeout, - # max_concurrent=max_concurrent, - # ), - # total=len(slides), - # ): - # if not result.success: - # with open(Path(output_path) / "qc_errors.log", "a") as log_file: - # log_file.write( - # f"Failed to process {result.wsi_path}: {result.error}\n" - # ) + async for result in tqdm( + client.qc.check_slides( + slides, + output_path, + config=SlideCheckConfig(**qc_parameters), + timeout=request_timeout, + max_concurrent=max_concurrent, + ), + total=len(slides), + ): + if not result.success: + with open(Path(output_path) / "qc_errors.log", "a") as log_file: + log_file.write( + f"Failed to process {result.wsi_path}: {result.error}\n" + ) # Organize generated masks into subdirectories for prefix, artifact_name in get_qc_masks(qc_parameters): From 3c7a5fc3aef36b684bec8541b57cc655fa476455 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 21:50:52 +0000 Subject: [PATCH 27/42] feat: conf --- configs/data/processed_w_masks/ikem.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/data/processed_w_masks/ikem.yaml b/configs/data/processed_w_masks/ikem.yaml index c7c6c59..476e1a3 100644 --- a/configs/data/processed_w_masks/ikem.yaml +++ b/configs/data/processed_w_masks/ikem.yaml @@ -4,4 +4,4 @@ dataset: institution: ikem uri: mlflow-artifacts:/86/52d0081d60ba4585a16a3bd341d5ab09/artifacts/dataset.csv # TODO update URI tissue_mask_uri: mlflow-artifacts:/86/13359cdd5d1a47ddabc352b9aa0d7635/artifacts/tissue_masks # TODO update URI - qc_mask_uri: ??? \ No newline at end of file + qc_mask_uri: mlflow-artifacts:/86/98443fe2b67445d5a56598bff15b7f27/artifacts # TODO update URI \ No newline at end of file From 774f24a5d336dbb91414207c9ae58f4c598c8a7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 21:56:27 +0000 Subject: [PATCH 28/42] fix: splits --- configs/experiment/preprocessing/tiling/ftn_0_320px.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ftn_0_75um.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ftn_1_224px.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ftn_2_224px.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ikem_0_320px.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ikem_0_75um.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ikem_1_224px.yaml | 6 +++--- configs/experiment/preprocessing/tiling/ikem_2_224px.yaml | 6 +++--- 8 files changed, 24 insertions(+), 24 deletions(-) diff --git a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml index 4aca578..ba2308a 100644 --- a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml @@ -9,6 +9,6 @@ tile_extent: 320 stride: 320 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 \ No newline at end of file diff --git a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml index 5d42aab..00c2764 100644 --- a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml @@ -9,6 +9,6 @@ tile_extent: 430 # 75 / 0.17 ≈ 430 stride: 430 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 diff --git a/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml b/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml index 24c31a7..aa6ade0 100644 --- a/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml @@ -9,6 +9,6 @@ tile_extent: 224 stride: 112 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 \ No newline at end of file diff --git a/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml b/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml index 6a33208..3dbc290 100644 --- a/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml @@ -9,6 +9,6 @@ tile_extent: 224 stride: 112 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml index 076a282..36b74cb 100644 --- a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml @@ -9,6 +9,6 @@ tile_extent: 320 stride: 320 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 \ No newline at end of file diff --git a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml index a4bfc6d..d249001 100644 --- a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml @@ -9,6 +9,6 @@ tile_extent: 430 # 75 / 0.17 ≈ 430 stride: 430 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 diff --git a/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml b/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml index bc7905c..49aa860 100644 --- a/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml @@ -9,6 +9,6 @@ tile_extent: 224 stride: 112 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 diff --git a/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml b/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml index 0837045..b0d0e36 100644 --- a/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml @@ -9,6 +9,6 @@ tile_extent: 224 stride: 112 splits: - train: 0.0 - test_preliminary: 0.5 - test_final: 0.5 + train: 0.7 + test_preliminary: 0.15 + test_final: 0.15 From 076ba934c44945a91681374f38ede4a16994e440 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 22:22:52 +0000 Subject: [PATCH 29/42] feat: tweaking resources --- preprocessing/tiling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 4880286..8324f6e 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -37,7 +37,7 @@ class _RayMemResources(TypedDict): LO_CPU: _RayCpuResources = {"num_cpus": 0.1} HI_CPU: _RayCpuResources = {"num_cpus": 0.2} LO_MEM: _RayMemResources = {"memory": 128 * 1024**2} -HI_MEM: _RayMemResources = {"memory": 1024**3} +HI_MEM: _RayMemResources = {"memory": 256 * 1024**2} def download_dataset(uri: str) -> pd.DataFrame: @@ -195,7 +195,7 @@ def tiling( tiles = ( slides.flat_map(tile, **HI_CPU, **LO_MEM) - .repartition(target_num_rows_per_block=128) + .repartition(target_num_rows_per_block=4096) .with_column( "tissue_overlap", tile_overlay_overlap( From 27886567d472213f0ceb0028c568faa0aee64b2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 23:26:06 +0000 Subject: [PATCH 30/42] feat: confs --- configs/data/processed_w_masks/ftn.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/data/processed_w_masks/ftn.yaml b/configs/data/processed_w_masks/ftn.yaml index 519dc93..57903cf 100644 --- a/configs/data/processed_w_masks/ftn.yaml +++ b/configs/data/processed_w_masks/ftn.yaml @@ -4,4 +4,4 @@ dataset: institution: ftn uri: mlflow-artifacts:/86/142642fc780f4a96800c691168b5c2c3/artifacts/dataset.csv # TODO update URI tissue_mask_uri: mlflow-artifacts:/86/04778b10de254572b69ce0a101c1eee4/artifacts/tissue_masks # TODO update URI - qc_mask_uri: ??? \ No newline at end of file + qc_mask_uri: mlflow-artifacts:/86/c8edfb2541e84b44b1a28be3540c1a35/artifacts # TODO update URI \ No newline at end of file From 25bec6e9e52ed4344b475a4d5e9990b8b5789d04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 15:17:40 +0000 Subject: [PATCH 31/42] chore: dependecies --- pyproject.toml | 3 ++- uv.lock | 10 +++------- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6933a1f..37038f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "ray>=2.52.1", "torch>=2.9.0", "torchmetrics>=1.8.2", - "ratiopath>=1.0.4", + "ratiopath", ] [dependency-groups] @@ -33,3 +33,4 @@ rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/ rationai-tiling = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" } rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" } rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" } +ratiopath = { git = "https://github.com/RationAI/ratiopath.git", branch = "fix/read-overlay" } diff --git a/uv.lock b/uv.lock index 352dc6b..b8f001d 100644 --- a/uv.lock +++ b/uv.lock @@ -2073,8 +2073,8 @@ dependencies = [ [[package]] name = "ratiopath" -version = "1.1.0" -source = { registry = "https://pypi.org/simple" } +version = "1.1.1" +source = { git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fread-overlay#8623fbda503e93a5b19770f089a7528d3e0e15eb" } dependencies = [ { name = "albumentations" }, { name = "geopandas" }, @@ -2092,10 +2092,6 @@ dependencies = [ { name = "torch" }, { name = "zarr" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7f/ce/93e454d7a0c5928be065b0e31771162d2c8e1cdf736b3049a4aa9da526db/ratiopath-1.1.0.tar.gz", hash = "sha256:a7af55867f132422b83b37ba8d0ad2b3fbac7bb2031e107c8a468533ff8cc20c", size = 24276, upload-time = "2026-01-28T17:59:37.021Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/06/49/195ecc09b87347dbd4cdb42b2152ccb8369975baf53b65d7b577f81a7e23/ratiopath-1.1.0-py3-none-any.whl", hash = "sha256:5db5845dd2a8b23fe94501edc97aa812b94813b658e28d14b528f097bd1cc267", size = 30858, upload-time = "2026-01-28T17:59:35.906Z" }, -] [[package]] name = "ray" @@ -2661,7 +2657,7 @@ requires-dist = [ { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, - { name = "ratiopath", specifier = ">=1.0.4" }, + { name = "ratiopath", git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fread-overlay" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, From 5faa224571115f3431a3e43a347b0e27efc9be94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 22:14:38 +0000 Subject: [PATCH 32/42] fix: add paths --- preprocessing/tiling.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 8324f6e..ca2b358 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -194,7 +194,13 @@ def tiling( qc_roi = create_qc_roi(tile_extent) tiles = ( - slides.flat_map(tile, **HI_CPU, **LO_MEM) + slides.map( + add_mask_paths, # type: ignore[reportArgumentType] + fn_args=(qc_folder, tissue_folder), + **LO_CPU, + **LO_MEM, + ) + .flat_map(tile, **HI_CPU, **LO_MEM) .repartition(target_num_rows_per_block=4096) .with_column( "tissue_overlap", From eb96f56aacfa512bd4ede9d70d5e6bf1a9eccc16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 5 Feb 2026 08:55:02 +0000 Subject: [PATCH 33/42] fix: conf --- configs/preprocessing/tiling.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/configs/preprocessing/tiling.yaml b/configs/preprocessing/tiling.yaml index 2ac485c..b6ddec3 100644 --- a/configs/preprocessing/tiling.yaml +++ b/configs/preprocessing/tiling.yaml @@ -3,10 +3,6 @@ mpp: ??? tile_extent: ??? stride: ??? - -tissue_mask_uri: ??? -qc_mask_uri: ??? - tissue_threshold: 0.5 splits: From 89a458233b2ff842f31462a6b6ca1933d3e9d7e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 5 Feb 2026 09:05:09 +0000 Subject: [PATCH 34/42] fix: typo --- preprocessing/tiling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index ca2b358..73490e2 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -187,7 +187,6 @@ def tiling( .map(row_hash, **LO_CPU, **LO_MEM) .map(nancy, fn_args=(df,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] .map(qc_agg, fn_args=(qc_df,), **HI_CPU, **LO_MEM) # type: ignore[reportArgumentType] - .map(add_mask_paths, fn_args=(qc_folder, tissue_folder), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] ) tissue_roi = create_tissue_roi(tile_extent) From 18c556d5e3fe74e3c2511419d01c5ac45e9964d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 5 Feb 2026 09:21:32 +0000 Subject: [PATCH 35/42] chore: dependencies --- pyproject.toml | 3 +-- uv.lock | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 37038f3..f275dc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ "ray>=2.52.1", "torch>=2.9.0", "torchmetrics>=1.8.2", - "ratiopath", + "ratiopath>=1.1.1", ] [dependency-groups] @@ -33,4 +33,3 @@ rationai-masks = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/ rationai-tiling = { git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" } rationai-kube-jobs = { git = "ssh://git@gitlab.ics.muni.cz/rationai/infrastructure/kube-jobs" } rationai-sdk = { git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" } -ratiopath = { git = "https://github.com/RationAI/ratiopath.git", branch = "fix/read-overlay" } diff --git a/uv.lock b/uv.lock index b8f001d..5f15f44 100644 --- a/uv.lock +++ b/uv.lock @@ -2074,7 +2074,7 @@ dependencies = [ [[package]] name = "ratiopath" version = "1.1.1" -source = { git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fread-overlay#8623fbda503e93a5b19770f089a7528d3e0e15eb" } +source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "albumentations" }, { name = "geopandas" }, @@ -2092,6 +2092,10 @@ dependencies = [ { name = "torch" }, { name = "zarr" }, ] +sdist = { url = "https://files.pythonhosted.org/packages/07/4c/fb30cb6689a42218498399c2d5deac9d657f71abc997de76843a83f0bb71/ratiopath-1.1.1.tar.gz", hash = "sha256:ed2434ad4db1281b1b5f6a1461efe28b2195e3728702c93f89a527b6a77227ad", size = 24469, upload-time = "2026-02-05T09:18:20.33Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/89/7c/b9859ad60675976b5a9df46da2253747f685530df66b9813dcf80449a445/ratiopath-1.1.1-py3-none-any.whl", hash = "sha256:b5971f3db693775501daada26a8e4afeb9b7ae9aa761c85930811317fb53344f", size = 31084, upload-time = "2026-02-05T09:18:18.889Z" }, +] [[package]] name = "ray" @@ -2657,7 +2661,7 @@ requires-dist = [ { name = "rationai-mlkit", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/mlkit.git" }, { name = "rationai-sdk", git = "https://gitlab.ics.muni.cz/rationai/infrastructure/rationai-sdk-python.git" }, { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, - { name = "ratiopath", git = "https://github.com/RationAI/ratiopath.git?branch=fix%2Fread-overlay" }, + { name = "ratiopath", specifier = ">=1.1.1" }, { name = "ray", specifier = ">=2.52.1" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, From 35cfb5c061869db72dd66188541362094d8a7bc3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Fri, 6 Feb 2026 08:45:49 +0000 Subject: [PATCH 36/42] fix: group splitting --- preprocessing/tiling.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 73490e2..d2178d1 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -17,7 +17,7 @@ from ray.data.expressions import col from shapely import Polygon from shapely.geometry import box -from sklearn.model_selection import train_test_split +from sklearn.model_selection import GroupShuffleSplit ray.init(runtime_env={"excludes": [".git", ".venv"]}) @@ -46,6 +46,20 @@ def download_dataset(uri: str) -> pd.DataFrame: return df +def train_test_split_groups( + df: pd.DataFrame, + train_size: float | None = None, + test_size: float | None = None, + random_state: int | None = None, + groups: pd.Series | None = None, +) -> tuple[pd.DataFrame, pd.DataFrame]: + splitter = GroupShuffleSplit( + 1, train_size=train_size, test_size=test_size, random_state=random_state + ) + train_idx, test_idx = next(splitter.split(df, groups=groups)) + return df.iloc[train_idx], df.iloc[test_idx] + + def split_dataset( dataset: pd.DataFrame, splits: dict[str, float], random_state: int = 42 ) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: @@ -53,19 +67,14 @@ def split_dataset( splits["train"] + splits["test_preliminary"] + splits["test_final"], 1.0 ), "Splits must sum to 1.0" - train: pd.DataFrame - test: pd.DataFrame - test_preliminary: pd.DataFrame - test_final: pd.DataFrame - if splits["train"] == 0.0: train = pd.DataFrame(columns=dataset.columns) test = dataset else: - train, test = train_test_split( + train, test = train_test_split_groups( dataset, train_size=splits["train"], - stratify=dataset["nancy"], + groups=dataset["case_id"], random_state=random_state, ) @@ -74,10 +83,10 @@ def split_dataset( test_final = test else: preliminary_size = splits["test_preliminary"] / (1.0 - splits["train"]) - test_preliminary, test_final = train_test_split( + test_preliminary, test_final = train_test_split_groups( test, train_size=preliminary_size, - stratify=test["nancy"], + groups=test["case_id"], random_state=random_state, ) From bc716689dfb7f489e026f94ab258c7444d34dd29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 11 Feb 2026 13:13:25 +0000 Subject: [PATCH 37/42] feat: configs --- configs/data/processed/ftn.yaml | 5 ----- configs/data/processed/ikem.yaml | 5 ----- configs/data/processed/knl_patos.yaml | 5 ----- configs/dataset/processed/ftn.yaml | 5 +++++ configs/dataset/processed/ikem.yaml | 5 +++++ configs/dataset/processed/knl_patos.yaml | 5 +++++ preprocessing/quality_control.py | 4 ++-- scripts/preprocessing/quality_control.py | 6 ++---- 8 files changed, 19 insertions(+), 21 deletions(-) delete mode 100644 configs/data/processed/ftn.yaml delete mode 100644 configs/data/processed/ikem.yaml delete mode 100644 configs/data/processed/knl_patos.yaml create mode 100644 configs/dataset/processed/ftn.yaml create mode 100644 configs/dataset/processed/ikem.yaml create mode 100644 configs/dataset/processed/knl_patos.yaml diff --git a/configs/data/processed/ftn.yaml b/configs/data/processed/ftn.yaml deleted file mode 100644 index 7c2d21a..0000000 --- a/configs/data/processed/ftn.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# @package _global_ - -dataset: - institution: ftn - uri: mlflow-artifacts:/86/142642fc780f4a96800c691168b5c2c3/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/data/processed/ikem.yaml b/configs/data/processed/ikem.yaml deleted file mode 100644 index 9e84997..0000000 --- a/configs/data/processed/ikem.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# @package _global_ - -dataset: - institution: ikem - uri: mlflow-artifacts:/86/52d0081d60ba4585a16a3bd341d5ab09/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/data/processed/knl_patos.yaml b/configs/data/processed/knl_patos.yaml deleted file mode 100644 index afbd976..0000000 --- a/configs/data/processed/knl_patos.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# @package _global_ - -dataset: - institution: knl_patos - uri: mlflow-artifacts:/86/afbcfd43cb3c4fd0b1e9b5dbe7327d91/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/dataset/processed/ftn.yaml b/configs/dataset/processed/ftn.yaml new file mode 100644 index 0000000..ff790d9 --- /dev/null +++ b/configs/dataset/processed/ftn.yaml @@ -0,0 +1,5 @@ +defaults: + - /dataset/raw/ftn@_here_ + - _self_ + +uri: mlflow-artifacts:/86/142642fc780f4a96800c691168b5c2c3/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/dataset/processed/ikem.yaml b/configs/dataset/processed/ikem.yaml new file mode 100644 index 0000000..0bb446d --- /dev/null +++ b/configs/dataset/processed/ikem.yaml @@ -0,0 +1,5 @@ +defaults: + - /dataset/raw/ikem@_here_ + - _self_ + +uri: mlflow-artifacts:/86/52d0081d60ba4585a16a3bd341d5ab09/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/configs/dataset/processed/knl_patos.yaml b/configs/dataset/processed/knl_patos.yaml new file mode 100644 index 0000000..0ac6069 --- /dev/null +++ b/configs/dataset/processed/knl_patos.yaml @@ -0,0 +1,5 @@ +defaults: + - /dataset/raw/knl_patos@_here_ + - _self_ + +uri: mlflow-artifacts:/86/afbcfd43cb3c4fd0b1e9b5dbe7327d91/artifacts/dataset.csv # TODO update URI \ No newline at end of file diff --git a/preprocessing/quality_control.py b/preprocessing/quality_control.py index caf1fa7..58c630e 100644 --- a/preprocessing/quality_control.py +++ b/preprocessing/quality_control.py @@ -91,7 +91,7 @@ async def qc_main( logger.log_artifacts(local_dir=output_path) -def download_dataframe(uri: str) -> pd.DataFrame: +def download_dataset(uri: str) -> pd.DataFrame: path = mlflow.artifacts.download_artifacts(artifact_uri=uri) df = pd.read_csv(path) return df @@ -101,7 +101,7 @@ def download_dataframe(uri: str) -> pd.DataFrame: @hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) @autolog def main(config: DictConfig, logger: MLFlowLogger) -> None: - df = download_dataframe(config.dataset.uri) + df = download_dataset(config.dataset.uri) output_path = Path(config.output_dir) output_path.mkdir(parents=True, exist_ok=True) diff --git a/scripts/preprocessing/quality_control.py b/scripts/preprocessing/quality_control.py index cdb974e..39fdded 100644 --- a/scripts/preprocessing/quality_control.py +++ b/scripts/preprocessing/quality_control.py @@ -1,10 +1,8 @@ from kube_jobs import storage, submit_job -COHORT = "ikem" # "ikem", "ftn", or "knl_patos" - submit_job( - job_name=f"ulcerative-colitis-quality-control-{COHORT.replace('_', '-')}", + job_name="ulcerative-colitis-quality-control-...", username=..., public=False, cpu=2, @@ -13,7 +11,7 @@ "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", "cd workdir", "uv sync --frozen", - f"uv run -m preprocessing.quality_control +data=processed/{COHORT}", + "uv run -m preprocessing.quality_control +dataset=processed/...", ], storage=[storage.secure.DATA], ) From 30d4494ef6548d6ed18e551d7f0ecc60d02a0c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 18:18:53 +0000 Subject: [PATCH 38/42] feat: configs --- configs/data/processed_w_masks/ftn.yaml | 7 ------- configs/data/processed_w_masks/ikem.yaml | 7 ------- configs/data/processed_w_masks/knl_patos.yaml | 7 ------- configs/dataset/processed_w_masks/ftn.yaml | 6 ++++++ configs/dataset/processed_w_masks/ikem.yaml | 6 ++++++ configs/dataset/processed_w_masks/knl_patos.yaml | 6 ++++++ configs/experiment/.gitkeep | 0 configs/experiment/preprocessing/tiling/ftn_0_320px.yaml | 2 +- configs/experiment/preprocessing/tiling/ftn_0_75um.yaml | 2 +- configs/experiment/preprocessing/tiling/ftn_1_224px.yaml | 2 +- configs/experiment/preprocessing/tiling/ftn_2_224px.yaml | 2 +- configs/experiment/preprocessing/tiling/ikem_0_320px.yaml | 2 +- configs/experiment/preprocessing/tiling/ikem_0_75um.yaml | 2 +- configs/experiment/preprocessing/tiling/ikem_1_224px.yaml | 2 +- configs/experiment/preprocessing/tiling/ikem_2_224px.yaml | 2 +- .../experiment/preprocessing/tiling/knl_patos_0_320px.yaml | 2 +- .../experiment/preprocessing/tiling/knl_patos_0_75um.yaml | 2 +- .../experiment/preprocessing/tiling/knl_patos_1_224px.yaml | 2 +- .../experiment/preprocessing/tiling/knl_patos_2_224px.yaml | 2 +- configs/preprocessing/tiling.yaml | 6 ++++-- preprocessing/tiling.py | 4 +--- scripts/preprocessing/tiling.py | 7 ++----- 22 files changed, 37 insertions(+), 43 deletions(-) delete mode 100644 configs/data/processed_w_masks/ftn.yaml delete mode 100644 configs/data/processed_w_masks/ikem.yaml delete mode 100644 configs/data/processed_w_masks/knl_patos.yaml create mode 100644 configs/dataset/processed_w_masks/ftn.yaml create mode 100644 configs/dataset/processed_w_masks/ikem.yaml create mode 100644 configs/dataset/processed_w_masks/knl_patos.yaml delete mode 100644 configs/experiment/.gitkeep diff --git a/configs/data/processed_w_masks/ftn.yaml b/configs/data/processed_w_masks/ftn.yaml deleted file mode 100644 index 57903cf..0000000 --- a/configs/data/processed_w_masks/ftn.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# @package _global_ - -dataset: - institution: ftn - uri: mlflow-artifacts:/86/142642fc780f4a96800c691168b5c2c3/artifacts/dataset.csv # TODO update URI - tissue_mask_uri: mlflow-artifacts:/86/04778b10de254572b69ce0a101c1eee4/artifacts/tissue_masks # TODO update URI - qc_mask_uri: mlflow-artifacts:/86/c8edfb2541e84b44b1a28be3540c1a35/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/data/processed_w_masks/ikem.yaml b/configs/data/processed_w_masks/ikem.yaml deleted file mode 100644 index 476e1a3..0000000 --- a/configs/data/processed_w_masks/ikem.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# @package _global_ - -dataset: - institution: ikem - uri: mlflow-artifacts:/86/52d0081d60ba4585a16a3bd341d5ab09/artifacts/dataset.csv # TODO update URI - tissue_mask_uri: mlflow-artifacts:/86/13359cdd5d1a47ddabc352b9aa0d7635/artifacts/tissue_masks # TODO update URI - qc_mask_uri: mlflow-artifacts:/86/98443fe2b67445d5a56598bff15b7f27/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/data/processed_w_masks/knl_patos.yaml b/configs/data/processed_w_masks/knl_patos.yaml deleted file mode 100644 index 67bf0d8..0000000 --- a/configs/data/processed_w_masks/knl_patos.yaml +++ /dev/null @@ -1,7 +0,0 @@ -# @package _global_ - -dataset: - institution: knl_patos - uri: mlflow-artifacts:/86/afbcfd43cb3c4fd0b1e9b5dbe7327d91/artifacts/dataset.csv # TODO update URI - tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks # TODO update URI - qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/dataset/processed_w_masks/ftn.yaml b/configs/dataset/processed_w_masks/ftn.yaml new file mode 100644 index 0000000..06d6d14 --- /dev/null +++ b/configs/dataset/processed_w_masks/ftn.yaml @@ -0,0 +1,6 @@ +defaults: + - /dataset/processed/ftn@_here_ + - _self_ + +tissue_mask_uri: mlflow-artifacts:/86/04778b10de254572b69ce0a101c1eee4/artifacts/tissue_masks # TODO update URI +qc_mask_uri: mlflow-artifacts:/86/c8edfb2541e84b44b1a28be3540c1a35/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/dataset/processed_w_masks/ikem.yaml b/configs/dataset/processed_w_masks/ikem.yaml new file mode 100644 index 0000000..db24eee --- /dev/null +++ b/configs/dataset/processed_w_masks/ikem.yaml @@ -0,0 +1,6 @@ +defaults: + - /dataset/processed/ikem@_here_ + - _self_ + +tissue_mask_uri: mlflow-artifacts:/86/13359cdd5d1a47ddabc352b9aa0d7635/artifacts/tissue_masks # TODO update URI +qc_mask_uri: mlflow-artifacts:/86/98443fe2b67445d5a56598bff15b7f27/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/dataset/processed_w_masks/knl_patos.yaml b/configs/dataset/processed_w_masks/knl_patos.yaml new file mode 100644 index 0000000..1320558 --- /dev/null +++ b/configs/dataset/processed_w_masks/knl_patos.yaml @@ -0,0 +1,6 @@ +defaults: + - /dataset/processed/knl_patos@_here_ + - _self_ + +tissue_mask_uri: mlflow-artifacts:/86/8ef6d6f0c9af4f35a087596960f675aa/artifacts/tissue_masks # TODO update URI +qc_mask_uri: mlflow-artifacts:/86/75fc3e53112f4634ae5238777d87e88c/artifacts # TODO update URI \ No newline at end of file diff --git a/configs/experiment/.gitkeep b/configs/experiment/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml index ba2308a..04c7a63 100644 --- a/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_0_320px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ftn + - /dataset/processed_w_masks/ftn@dataset - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml index 00c2764..bd090c4 100644 --- a/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_0_75um.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ftn + - /dataset/processed_w_masks/ftn@dataset - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml b/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml index aa6ade0..d06650b 100644 --- a/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_1_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ftn + - /dataset/processed_w_masks/ftn@dataset - _self_ mpp: 0.52 # level 1 diff --git a/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml b/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml index 3dbc290..1867e1e 100644 --- a/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ftn_2_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ftn + - /dataset/processed_w_masks/ftn@dataset - _self_ mpp: 1.55 # level 2 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml index 36b74cb..d626600 100644 --- a/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_0_320px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ikem + - /dataset/processed_w_masks/ikem@dataset - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml index d249001..cce219a 100644 --- a/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_0_75um.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ikem + - /dataset/processed_w_masks/ikem@dataset - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml b/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml index 49aa860..ff0cdae 100644 --- a/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_1_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ikem + - /dataset/processed_w_masks/ikem@dataset - _self_ mpp: 0.52 # level 1 diff --git a/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml b/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml index b0d0e36..e4a4b2c 100644 --- a/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml +++ b/configs/experiment/preprocessing/tiling/ikem_2_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/ikem + - /dataset/processed_w_masks/ikem@dataset - _self_ mpp: 1.55 # level 2 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml index 60764af..adb97b3 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_0_320px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/knl_patos + - /dataset/processed_w_masks/knl_patos@dataset - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml b/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml index 2d84218..3f848ba 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_0_75um.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/knl_patos + - /dataset/processed_w_masks/knl_patos@dataset - _self_ mpp: 0.17 # level 0 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml index 51992fd..0734769 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_1_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/knl_patos + - /dataset/processed_w_masks/knl_patos@dataset - _self_ mpp: 0.52 # level 1 diff --git a/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml b/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml index fec9d84..81af660 100644 --- a/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml +++ b/configs/experiment/preprocessing/tiling/knl_patos_2_224px.yaml @@ -1,7 +1,7 @@ # @package _global_ defaults: - - /data/processed_w_masks/knl_patos + - /dataset/processed_w_masks/knl_patos@dataset - _self_ mpp: 1.55 # level 2 diff --git a/configs/preprocessing/tiling.yaml b/configs/preprocessing/tiling.yaml index b6ddec3..1840882 100644 --- a/configs/preprocessing/tiling.yaml +++ b/configs/preprocessing/tiling.yaml @@ -11,8 +11,10 @@ splits: test_final: ??? metadata: - run_name: "🧱 Tiling: ${dataset.institution} ${tile_extent}" - description: Tile extraction for ${dataset.institution} institution with tile extent ${tile_extent} + run_name: "🧱 Tiling: ${tile_extent}" + # run_name: "🧱 Tiling: ${dataset.institution} ${tile_extent}" + description: Tile extraction for institution with tile extent ${tile_extent} + # description: Tile extraction for ${dataset.institution} institution with tile extent ${tile_extent} hyperparams: mpp: ${mpp} tile_extent: ${tile_extent} diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index d2178d1..42f9662 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -20,9 +20,6 @@ from sklearn.model_selection import GroupShuffleSplit -ray.init(runtime_env={"excludes": [".git", ".venv"]}) - - QC_SUBFOLDERS = {"blur": "blur_per_pixel", "artifacts": "artifacts_per_pixel"} @@ -289,4 +286,5 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: if __name__ == "__main__": + ray.init(runtime_env={"excludes": [".git", ".venv"]}) main() diff --git a/scripts/preprocessing/tiling.py b/scripts/preprocessing/tiling.py index cbc3bdd..85572d7 100644 --- a/scripts/preprocessing/tiling.py +++ b/scripts/preprocessing/tiling.py @@ -1,11 +1,8 @@ from kube_jobs import storage, submit_job -COHORT = "ikem" # "ikem", "ftn", or "knl_patos" -TILE_EXTENT = "224px" # "224px", "320px", or "75um" - submit_job( - job_name=f"ulcerative-colitis-tiling-{COHORT.replace('_', '-')}-{TILE_EXTENT}", + job_name="ulcerative-colitis-tiling-...", username=..., public=False, cpu=64, @@ -15,7 +12,7 @@ "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", "cd workdir", "uv sync --frozen", - f"uv run --active -m preprocessing.tiling +experiment=preprocessing/tiling/{COHORT}_{TILE_EXTENT}", + "uv run --active -m preprocessing.tiling +experiment=preprocessing/tiling/...", ], storage=[storage.secure.DATA], ) From 88eb5dca5f67be904f30169a4b5f4c1645d7ee2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 18:21:46 +0000 Subject: [PATCH 39/42] fix: conf --- configs/preprocessing/tiling.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/configs/preprocessing/tiling.yaml b/configs/preprocessing/tiling.yaml index 1840882..b6ddec3 100644 --- a/configs/preprocessing/tiling.yaml +++ b/configs/preprocessing/tiling.yaml @@ -11,10 +11,8 @@ splits: test_final: ??? metadata: - run_name: "🧱 Tiling: ${tile_extent}" - # run_name: "🧱 Tiling: ${dataset.institution} ${tile_extent}" - description: Tile extraction for institution with tile extent ${tile_extent} - # description: Tile extraction for ${dataset.institution} institution with tile extent ${tile_extent} + run_name: "🧱 Tiling: ${dataset.institution} ${tile_extent}" + description: Tile extraction for ${dataset.institution} institution with tile extent ${tile_extent} hyperparams: mpp: ${mpp} tile_extent: ${tile_extent} From 4269a3a29ece358ea2b7d2b0aa6e8c403693ab73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 20:23:12 +0000 Subject: [PATCH 40/42] fix: PR --- preprocessing/tiling.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/preprocessing/tiling.py b/preprocessing/tiling.py index 42f9662..081024e 100644 --- a/preprocessing/tiling.py +++ b/preprocessing/tiling.py @@ -20,6 +20,8 @@ from sklearn.model_selection import GroupShuffleSplit +QC_BLUR_MEAN_COLUMN = "mean_coverage(Piqe)" +QC_ARTIFACTS_MEAN_COLUMN = "mean_coverage(ResidualArtifactsAndCoverage)" QC_SUBFOLDERS = {"blur": "blur_per_pixel", "artifacts": "artifacts_per_pixel"} @@ -90,7 +92,7 @@ def split_dataset( return train, test_preliminary, test_final -def nancy(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: +def add_nancy_index(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: row["nancy_index"] = df.loc[Path(row["path"]).stem, "nancy"] return row @@ -98,8 +100,8 @@ def nancy(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: def qc_agg(row: dict[str, Any], df: pd.DataFrame) -> dict[str, Any]: qc_df = cast("pd.Series", df.loc[Path(row["path"]).stem]) - row["blur_mean"] = qc_df["mean_coverage(Piqe)"] - row["artifacts_mean"] = qc_df["mean_coverage(ResidualArtifactsAndCoverage)"] + row["blur_mean"] = qc_df[QC_BLUR_MEAN_COLUMN] + row["artifacts_mean"] = qc_df[QC_ARTIFACTS_MEAN_COLUMN] return row @@ -191,7 +193,7 @@ def tiling( slides = ( read_slides(paths, tile_extent=tile_extent, stride=stride, mpp=mpp) .map(row_hash, **LO_CPU, **LO_MEM) - .map(nancy, fn_args=(df,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] + .map(add_nancy_index, fn_args=(df,), **LO_CPU, **LO_MEM) # type: ignore[reportArgumentType] .map(qc_agg, fn_args=(qc_df,), **HI_CPU, **LO_MEM) # type: ignore[reportArgumentType] ) From a9615ed1fc98a18d6b28ab84199d119831e71495 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 20:30:19 +0000 Subject: [PATCH 41/42] fix: repo --- scripts/preprocessing/quality_control.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/preprocessing/quality_control.py b/scripts/preprocessing/quality_control.py index 39fdded..dcb56b6 100644 --- a/scripts/preprocessing/quality_control.py +++ b/scripts/preprocessing/quality_control.py @@ -8,7 +8,7 @@ cpu=2, memory="4Gi", script=[ - "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", + "git clone https://github.com/RationAI/ulcerative-colitis.git workdir", "cd workdir", "uv sync --frozen", "uv run -m preprocessing.quality_control +dataset=processed/...", From 659910e53b6db15a9f7ed4a53740974aedeb3eb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 20:31:13 +0000 Subject: [PATCH 42/42] fix: repo --- scripts/preprocessing/tiling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/preprocessing/tiling.py b/scripts/preprocessing/tiling.py index 85572d7..b52d38f 100644 --- a/scripts/preprocessing/tiling.py +++ b/scripts/preprocessing/tiling.py @@ -9,7 +9,7 @@ memory="128Gi", shm="48Gi", script=[ - "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", + "git clone https://github.com/RationAI/ulcerative-colitis.git workdir", "cd workdir", "uv sync --frozen", "uv run --active -m preprocessing.tiling +experiment=preprocessing/tiling/...",