diff --git a/configs/dataset/tiled/ftn/0_320.yaml b/configs/dataset/tiled/ftn/0_320.yaml new file mode 100644 index 0000000..ff91a77 --- /dev/null +++ b/configs/dataset/tiled/ftn/0_320.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 320 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/0_430.yaml b/configs/dataset/tiled/ftn/0_430.yaml new file mode 100644 index 0000000..79b9c8e --- /dev/null +++ b/configs/dataset/tiled/ftn/0_430.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 430 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/1_224.yaml b/configs/dataset/tiled/ftn/1_224.yaml new file mode 100644 index 0000000..91a27da --- /dev/null +++ b/configs/dataset/tiled/ftn/1_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 0.52 +tile_extent: 224 +level: 1 +tiling_uris: + train: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/2_224.yaml b/configs/dataset/tiled/ftn/2_224.yaml new file mode 100644 index 0000000..55da441 --- /dev/null +++ b/configs/dataset/tiled/ftn/2_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 1.55 +tile_extent: 224 +level: 2 +tiling_uris: + train: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/0_320.yaml b/configs/dataset/tiled/ikem/0_320.yaml new file mode 100644 index 0000000..065bca1 --- /dev/null +++ b/configs/dataset/tiled/ikem/0_320.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 320 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/0_430.yaml b/configs/dataset/tiled/ikem/0_430.yaml new file mode 100644 index 0000000..98c8ff1 --- /dev/null +++ b/configs/dataset/tiled/ikem/0_430.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 430 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/1_224.yaml b/configs/dataset/tiled/ikem/1_224.yaml new file mode 100644 index 0000000..0b40c0e --- /dev/null +++ b/configs/dataset/tiled/ikem/1_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 0.52 +tile_extent: 224 +level: 1 +tiling_uris: + train: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/2_224.yaml b/configs/dataset/tiled/ikem/2_224.yaml new file mode 100644 index 0000000..c68e542 --- /dev/null +++ b/configs/dataset/tiled/ikem/2_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 1.55 +tile_extent: 224 +level: 2 +tiling_uris: + train: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/knl_patos/0_320.yaml b/configs/dataset/tiled/knl_patos/0_320.yaml new file mode 100644 index 0000000..a84731d --- /dev/null +++ b/configs/dataset/tiled/knl_patos/0_320.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 320 +level: 0 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test final - knl_patos" # TODO update URI diff --git a/configs/dataset/tiled/knl_patos/0_430.yaml b/configs/dataset/tiled/knl_patos/0_430.yaml new file mode 100644 index 0000000..0e6bb2c --- /dev/null +++ b/configs/dataset/tiled/knl_patos/0_430.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 430 +level: 0 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/knl_patos/1_224.yaml b/configs/dataset/tiled/knl_patos/1_224.yaml new file mode 100644 index 0000000..71e47d9 --- /dev/null +++ b/configs/dataset/tiled/knl_patos/1_224.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 0.52 +tile_extent: 224 +level: 1 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/knl_patos/2_224.yaml b/configs/dataset/tiled/knl_patos/2_224.yaml new file mode 100644 index 0000000..b814d37 --- /dev/null +++ b/configs/dataset/tiled/knl_patos/2_224.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 1.55 +tile_extent: 224 +level: 2 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml new file mode 100644 index 0000000..ae09076 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ftn/1_224@dataset + - /preprocessing/tile_encoder/prov_gigapath + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml new file mode 100644 index 0000000..aa5eddb --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ftn/1_224@dataset + - /preprocessing/tile_encoder/uni + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml new file mode 100644 index 0000000..d212ea2 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ftn/1_224@dataset + - /preprocessing/tile_encoder/uni2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml new file mode 100644 index 0000000..76c9ea9 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ftn/1_224@dataset + - /preprocessing/tile_encoder/virchow + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml new file mode 100644 index 0000000..02e1b1b --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ftn/1_224@dataset + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml new file mode 100644 index 0000000..f88b565 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ftn/2_224@dataset + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml new file mode 100644 index 0000000..4343f54 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ikem/1_224@dataset + - /preprocessing/tile_encoder/prov_gigapath + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml new file mode 100644 index 0000000..0687e5d --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ikem/1_224@dataset + - /preprocessing/tile_encoder/uni + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml new file mode 100644 index 0000000..0aed9a5 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ikem/1_224@dataset + - /preprocessing/tile_encoder/uni2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml new file mode 100644 index 0000000..9d1c211 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ikem/1_224@dataset + - /preprocessing/tile_encoder/virchow + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml new file mode 100644 index 0000000..e4ddf27 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ikem/1_224@dataset + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml new file mode 100644 index 0000000..d43c5dd --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/ikem/2_224@dataset + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml new file mode 100644 index 0000000..66c2fcd --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/knl_patos/1_224@dataset + - /preprocessing/tile_encoder/prov_gigapath + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml new file mode 100644 index 0000000..f3730ac --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/knl_patos/1_224@dataset + - /preprocessing/tile_encoder/uni + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml new file mode 100644 index 0000000..ae236ab --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/knl_patos/1_224@dataset + - /preprocessing/tile_encoder/uni2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml new file mode 100644 index 0000000..67688a1 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/knl_patos/1_224@dataset + - /preprocessing/tile_encoder/virchow + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml new file mode 100644 index 0000000..a8cc0ab --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/knl_patos/1_224@dataset + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml new file mode 100644 index 0000000..8e3dcde --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /dataset/tiled/knl_patos/2_224@dataset + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml new file mode 100644 index 0000000..6dacc98 --- /dev/null +++ b/configs/preprocessing/embeddings.yaml @@ -0,0 +1,17 @@ +# @package _global_ + +output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} + +dataloader: + batch_size: 2048 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + num_workers: 8 + persistent_workers: True + +metadata: + run_name: '📂 Embeddings: ${dataset.institution} - ${dataset.level} - ${tile_encoder.name}' + description: Embeddings preprocessing for ${dataset.institution} institution with ${tile_encoder.name} tile encoder. + hyperparams: + tile_encoder: ${tile_encoder.name} + level: ${dataset.level} + batch_size: ${dataloader.batch_size} + num_workers: ${dataloader.num_workers} \ No newline at end of file diff --git a/configs/preprocessing/tile_encoder/prov_gigapath.yaml b/configs/preprocessing/tile_encoder/prov_gigapath.yaml new file mode 100644 index 0000000..a40583e --- /dev/null +++ b/configs/preprocessing/tile_encoder/prov_gigapath.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.ProvGigaPath + name: prov_gigapath diff --git a/configs/preprocessing/tile_encoder/uni.yaml b/configs/preprocessing/tile_encoder/uni.yaml new file mode 100644 index 0000000..c3db256 --- /dev/null +++ b/configs/preprocessing/tile_encoder/uni.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.UNI + name: uni diff --git a/configs/preprocessing/tile_encoder/uni2.yaml b/configs/preprocessing/tile_encoder/uni2.yaml new file mode 100644 index 0000000..9febe23 --- /dev/null +++ b/configs/preprocessing/tile_encoder/uni2.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.UNI2 + name: uni2 diff --git a/configs/preprocessing/tile_encoder/virchow.yaml b/configs/preprocessing/tile_encoder/virchow.yaml new file mode 100644 index 0000000..69ff16e --- /dev/null +++ b/configs/preprocessing/tile_encoder/virchow.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.Virchow + name: virchow diff --git a/configs/preprocessing/tile_encoder/virchow2.yaml b/configs/preprocessing/tile_encoder/virchow2.yaml new file mode 100644 index 0000000..4952488 --- /dev/null +++ b/configs/preprocessing/tile_encoder/virchow2.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.Virchow2 + name: virchow2 diff --git a/project_name/__init__.py b/ml/__init__.py similarity index 100% rename from project_name/__init__.py rename to ml/__init__.py diff --git a/project_name/__main__.py b/ml/__main__.py similarity index 90% rename from project_name/__main__.py rename to ml/__main__.py index 035ecc3..6252c56 100644 --- a/project_name/__main__.py +++ b/ml/__main__.py @@ -6,8 +6,8 @@ from omegaconf import DictConfig, OmegaConf from rationai.mlkit import Trainer, autolog -from project_name.data import DataModule -from project_name.project_name_model import ProjectNameModel +from ml.data import DataModule +from ml.project_name_model import ProjectNameModel OmegaConf.register_new_resolver( diff --git a/project_name/data/README.md b/ml/data/README.md similarity index 100% rename from project_name/data/README.md rename to ml/data/README.md diff --git a/ml/data/__init__.py b/ml/data/__init__.py new file mode 100644 index 0000000..e7058ee --- /dev/null +++ b/ml/data/__init__.py @@ -0,0 +1,4 @@ +from ml.data.data_module import DataModule + + +__all__ = ["DataModule"] diff --git a/project_name/data/data_module.py b/ml/data/data_module.py similarity index 88% rename from project_name/data/data_module.py rename to ml/data/data_module.py index d3b8e06..f0690ff 100644 --- a/project_name/data/data_module.py +++ b/ml/data/data_module.py @@ -5,7 +5,8 @@ from omegaconf import DictConfig from torch.utils.data import DataLoader -from project_name.typing import Input + +# from ml.typing import Input TODO class DataModule(LightningDataModule): @@ -27,7 +28,7 @@ def setup(self, stage: str) -> None: case "test": self.test = instantiate(self.datasets["test"]) - def train_dataloader(self) -> Iterable[Input]: + def train_dataloader(self) -> Iterable: return DataLoader( self.train, batch_size=self.batch_size, @@ -37,7 +38,7 @@ def train_dataloader(self) -> Iterable[Input]: persistent_workers=self.num_workers > 0, ) - def val_dataloader(self) -> Iterable[Input]: + def val_dataloader(self) -> Iterable: return DataLoader( self.val, batch_size=self.batch_size, @@ -45,7 +46,7 @@ def val_dataloader(self) -> Iterable[Input]: persistent_workers=self.num_workers > 0, ) - def test_dataloader(self) -> Iterable[Input]: + def test_dataloader(self) -> Iterable: return DataLoader( self.test, batch_size=self.batch_size, num_workers=self.num_workers ) diff --git a/ml/data/datasets/__init__.py b/ml/data/datasets/__init__.py new file mode 100644 index 0000000..4163d98 --- /dev/null +++ b/ml/data/datasets/__init__.py @@ -0,0 +1,4 @@ +from ml.data.datasets.tiles import Tiles, TilesPredict + + +__all__ = ["Tiles", "TilesPredict"] diff --git a/ml/data/datasets/labels.py b/ml/data/datasets/labels.py new file mode 100644 index 0000000..67b3509 --- /dev/null +++ b/ml/data/datasets/labels.py @@ -0,0 +1,68 @@ +from enum import Enum +from pathlib import Path + +import pandas as pd +import torch + + +class LabelMode(Enum): + NEUTROPHILS = "neutrophils" + NANCY_HIGH = "nancy_high" + NANCY_LOW = "nancy_low" + ULCERATION = "ulceration" + NANCY_HIGH_ALL = "nancy_high_all" + NANCY_LOW_ALL = "nancy_low_all" + ULCERATION_ALL = "ulceration_all" + + +def process_slides(slides: pd.DataFrame, mode: LabelMode | None = None) -> pd.DataFrame: + slides = slides.copy() + match mode: + case LabelMode.NEUTROPHILS: + slides["neutrophils"] = slides["nancy_index"] >= 2 + case LabelMode.NANCY_LOW: + slides = slides[slides["nancy_index"] < 2].copy() + case LabelMode.NANCY_HIGH: + slides = slides[slides["nancy_index"] >= 2].copy() + slides["nancy_index"] -= 2 + case LabelMode.ULCERATION: + slides = slides[slides["nancy_index"] >= 2].copy() + slides["ulceration"] = slides["nancy_index"] == 4 + case LabelMode.NANCY_HIGH_ALL: + # new labels: 0,1 -> 0; 2,3,4 -> 1,2,3 + slides["nancy_index"] = slides["nancy_index"].apply(lambda x: max(0, x - 1)) + case LabelMode.NANCY_LOW_ALL: + # new labels: 0,1 -> 0,1; 2,3,4 -> 2 + slides["nancy_index"] = slides["nancy_index"].apply(lambda x: min(x, 2)) + case LabelMode.ULCERATION_ALL: + slides["ulceration"] = slides["nancy_index"] == 4 + + slides["name"] = slides["path"].apply(lambda x: Path(x).stem) + return slides + + +def get_label(slide_metadata: pd.Series, mode: LabelMode) -> torch.Tensor: + match mode: + case LabelMode.NEUTROPHILS: + return torch.tensor(slide_metadata["neutrophils"].item()).float() + case LabelMode.NANCY_LOW: + return torch.tensor(slide_metadata["nancy_index"].item()).float() + case LabelMode.NANCY_HIGH | LabelMode.NANCY_HIGH_ALL | LabelMode.NANCY_LOW_ALL: + return torch.tensor(slide_metadata["nancy_index"].item()).long() + case LabelMode.ULCERATION | LabelMode.ULCERATION_ALL: + return torch.tensor(slide_metadata["ulceration"].item()).float() + + +def get_target_column(mode: LabelMode) -> str: + match mode: + case LabelMode.NEUTROPHILS: + return "neutrophils" + case ( + LabelMode.NANCY_LOW + | LabelMode.NANCY_HIGH + | LabelMode.NANCY_HIGH_ALL + | LabelMode.NANCY_LOW_ALL + ): + return "nancy_index" + case LabelMode.ULCERATION | LabelMode.ULCERATION_ALL: + return "ulceration" diff --git a/ml/data/datasets/tiles.py b/ml/data/datasets/tiles.py new file mode 100644 index 0000000..d2f06fc --- /dev/null +++ b/ml/data/datasets/tiles.py @@ -0,0 +1,111 @@ +from collections.abc import Iterable +from typing import Generic, TypeVar + +import pandas as pd +from albumentations.core.composition import TransformType +from albumentations.pytorch import ToTensorV2 +from rationai.mlkit.data.datasets import MetaTiledSlides, OpenSlideTilesDataset +from torch.utils.data import Dataset + +from ml.data.datasets.labels import LabelMode, get_label, process_slides +from ml.typing import MetadataTiles, TilesPredictSample, TilesSample + + +T = TypeVar("T", bound=TilesSample | TilesPredictSample) + + +class _Tiles(Dataset[T], Generic[T]): + def __init__( + self, + slide_metadata: pd.Series, + tiles: pd.DataFrame, + mode: LabelMode | str | None, + include_labels: bool = True, + transforms: TransformType | None = None, + ) -> None: + super().__init__() + self.slide_tiles = OpenSlideTilesDataset( + slide_path=slide_metadata["path"], + level=slide_metadata["level"], + tile_extent_x=slide_metadata["tile_extent_x"], + tile_extent_y=slide_metadata["tile_extent_y"], + tiles=tiles, + ) + self.slide_metadata = slide_metadata + self.mode = LabelMode(mode) if mode is not None else None + self.include_labels = include_labels + self.transforms = transforms + self.to_tensor = ToTensorV2() + + if self.include_labels and self.mode is None: + raise ValueError("Mode must be specified if labels are included.") + + def __len__(self) -> int: + return len(self.slide_tiles) + + def __getitem__(self, idx: int) -> TilesSample | TilesPredictSample: + image = self.slide_tiles[idx] + metadata = MetadataTiles( + slide_id=self.slide_tiles.slide_path.stem, + x=self.slide_tiles.tiles.iloc[idx]["x"], + y=self.slide_tiles.tiles.iloc[idx]["y"], + ) + + if self.transforms is not None: + image = self.transforms(image=image)["image"] + + image = self.to_tensor(image=image)["image"] + if not self.include_labels: + return image, metadata + + assert self.mode is not None, "Mode must be specified for labels." + label = get_label(self.slide_metadata, self.mode) + return image, label, metadata + + +class Tiles(MetaTiledSlides[TilesSample]): + def __init__( + self, + uris: Iterable[str] | str, + mode: LabelMode | str, + transforms: TransformType | None = None, + ) -> None: + self.transforms = transforms + self.mode = LabelMode(mode) + super().__init__(uris=(uris,) if isinstance(uris, str) else uris) + + def generate_datasets(self) -> Iterable[_Tiles[TilesSample]]: + self.slides = process_slides(self.slides, self.mode) + return ( + _Tiles( + slide_metadata=slide, + tiles=self.filter_tiles_by_slide(slide["id"]), + mode=self.mode, + include_labels=True, + transforms=self.transforms, + ) + for _, slide in self.slides.iterrows() + ) + + +class TilesPredict(MetaTiledSlides[TilesPredictSample]): + def __init__( + self, + uris: Iterable[str] | str, + transforms: TransformType | None = None, + ) -> None: + self.transforms = transforms + super().__init__(uris=(uris,) if isinstance(uris, str) else uris) + + def generate_datasets(self) -> Iterable[_Tiles[TilesPredictSample]]: + self.slides = process_slides(self.slides) + return ( + _Tiles( + slide_metadata=slide, + tiles=self.filter_tiles_by_slide(slide["id"]), + mode=None, + include_labels=False, + transforms=self.transforms, + ) + for _, slide in self.slides.iterrows() + ) diff --git a/project_name/modeling/README.md b/ml/modeling/README.md similarity index 100% rename from project_name/modeling/README.md rename to ml/modeling/README.md diff --git a/project_name/modeling/__init__.py b/ml/modeling/__init__.py similarity index 100% rename from project_name/modeling/__init__.py rename to ml/modeling/__init__.py diff --git a/project_name/project_name_model.py b/ml/project_name_model.py similarity index 97% rename from project_name/project_name_model.py rename to ml/project_name_model.py index 85e50bc..403260d 100644 --- a/project_name/project_name_model.py +++ b/ml/project_name_model.py @@ -3,7 +3,7 @@ from torch.optim.optimizer import Optimizer from torchmetrics import MetricCollection -from project_name.typing import Input, Outputs +from ml.typing import Input, Outputs class ProjectNameModel(LightningModule): diff --git a/ml/typing.py b/ml/typing.py new file mode 100644 index 0000000..f2f26fc --- /dev/null +++ b/ml/typing.py @@ -0,0 +1,38 @@ +from pathlib import Path +from typing import TypeAlias, TypedDict + +import pandas as pd +from torch import Tensor + + +class Metadata(TypedDict): + slide_id: str + + +class MetadataTiles(Metadata): + x: int + y: int + + +TilesSample: TypeAlias = tuple[Tensor, Tensor, MetadataTiles] +TilesPredictSample: TypeAlias = tuple[Tensor, MetadataTiles] + + +class MetadataTileEmbeddings(Metadata): + slide_name: str + slide_path: Path + level: int + tile_extent_x: int + tile_extent_y: int + tiles: pd.DataFrame + x: Tensor # Tensor[int] + y: Tensor # Tensor[int] + + +TileEmbeddingsSample: TypeAlias = tuple[Tensor, Tensor, MetadataTileEmbeddings] +TileEmbeddingsPredictSample: TypeAlias = tuple[Tensor, MetadataTileEmbeddings] + +TileEmbeddingsInput: TypeAlias = tuple[Tensor, Tensor, list[MetadataTileEmbeddings]] +TileEmbeddingsPredictInput: TypeAlias = tuple[Tensor, list[MetadataTileEmbeddings]] + +Output: TypeAlias = Tensor diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py new file mode 100644 index 0000000..b1fe722 --- /dev/null +++ b/preprocessing/embeddings.py @@ -0,0 +1,241 @@ +import os +from collections.abc import Iterable +from pathlib import Path +from typing import cast + +import albumentations as A +import hydra +import pandas as pd +import timm +import torch +from huggingface_hub import login +from omegaconf import DictConfig +from rationai.mlkit import autolog, with_cli_args +from rationai.mlkit.lightning.loggers import MLFlowLogger +from timm.layers.mlp import SwiGLUPacked +from torch.utils.data import DataLoader +from tqdm import tqdm + +from ml.data.datasets import TilesPredict + + +class FoundationModel(torch.nn.Module): + def __init__(self, name: str, embed_dim: int) -> None: + """Wrapper for a foundation model - forward and dimension differ depending on the model.""" + super().__init__() + self.embed_dim = embed_dim + + +class ProvGigaPath(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 1536) + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf_hub:prov-gigapath/prov-gigapath", pretrained=True + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.module(x) + + +class Virchow(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 2560) + + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf-hub:paige-ai/Virchow", + pretrained=True, + mlp_layer=SwiGLUPacked, + act_layer=torch.nn.SiLU, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + output = self.module(x) + + class_token = output[:, 0] + patch_tokens = output[:, 1:] + return torch.cat([class_token, patch_tokens.mean(1)], dim=-1) + + +class Virchow2(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 2560) + + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf-hub:paige-ai/Virchow2", + pretrained=True, + mlp_layer=SwiGLUPacked, + act_layer=torch.nn.SiLU, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + output = self.module(x) # size: B x 261 x 1280 + + class_token = output[:, 0] # size: B x 1280 + patch_tokens = output[ + :, 5: + ] # size: B x 256 x 1280, tokens 1-4 are register tokens so we ignore those + + # concatenate class token and average pool of patch tokens + return torch.cat([class_token, patch_tokens.mean(1)], dim=-1) # size: B x 2560 + + +class UNI(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 1024) + + # For this, you need to setup HF_TOKEN= env.variable. + + self.module = timm.create_model( + "hf-hub:MahmoodLab/uni", + pretrained=True, + init_values=1e-5, + dynamic_img_size=True, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.module(x) + + +class UNI2(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 1536) + + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf-hub:MahmoodLab/UNI2-h", + pretrained=True, + img_size=224, + patch_size=14, + depth=24, + num_heads=24, + init_values=1e-5, + embed_dim=1536, + mlp_ratio=2.66667 * 2, + num_classes=0, + no_embed_class=True, + mlp_layer=SwiGLUPacked, + act_layer=torch.nn.SiLU, + reg_tokens=8, + dynamic_img_size=True, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.module(x) + + +def load_dataset(uris: Iterable[str]) -> TilesPredict: + """Load the dataset for tile embeddings. + + Assumes that the dataset has 224x224 RGB tiles. + + Args: + uris (Iterable[str]): The URIs of the tiles. + + Returns: + TilesPredict: The dataset object for tile embeddings. + """ + return TilesPredict( + uris, + transforms=A.Compose( + [ + A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ] + ), + ) + + +def save_embeddings( + slide_tiles_embeddings: torch.Tensor, + slide_tiles_x: torch.Tensor, + slide_tiles_y: torch.Tensor, + embeddings_path: Path, +) -> None: + """Save the slide embeddings to the specified path. + + Args: + slide_tiles_embeddings (torch.Tensor): The embeddings to save. + slide_tiles_x (torch.Tensor): The x-coordinates of the tiles. + slide_tiles_y (torch.Tensor): The y-coordinates of the tiles. + embeddings_path (Path): The path to save the embeddings to. + """ + df = pd.DataFrame( + { + "x": slide_tiles_x.numpy(), + "y": slide_tiles_y.numpy(), + "embedding": [emb.numpy() for emb in slide_tiles_embeddings], + } + ) + + df.to_parquet(embeddings_path, index=False, engine="pyarrow") + + +@with_cli_args(["+preprocessing=embeddings"]) +@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) +@autolog +def main(config: DictConfig, logger: MLFlowLogger) -> None: + hf_token = os.getenv("HF_TOKEN") + if not hf_token: + raise ValueError( + "Hugging Face token not found. Please set the HF_TOKEN environment variable." + ) + login(token=hf_token) + dest = Path(config.output_dir) + dest.mkdir(parents=True, exist_ok=True) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + tile_encoder: FoundationModel = hydra.utils.instantiate(config.tile_encoder) + tile_encoder = tile_encoder.to(device) + + with torch.no_grad(): + dataset = load_dataset(config.dataset.tiling_uris.values()) + + for slide_dataset in tqdm(dataset.generate_datasets()): + slide_name = str(slide_dataset.slide_metadata["name"]) + embeddings_path = (dest / slide_name).with_suffix(".parquet") + + if embeddings_path.exists(): + print(f"Embeddings for slide {slide_name} already exist, skipping...") + continue + + try: + slide_tiles_dataloader = DataLoader( + slide_dataset, + batch_size=config.dataloader.batch_size, + num_workers=config.dataloader.num_workers, + persistent_workers=config.dataloader.persistent_workers + and config.dataloader.num_workers > 0, + ) + slide_tiles_embeddings = torch.zeros( + (len(slide_dataset), tile_encoder.embed_dim), dtype=torch.float32 + ) + slide_tiles_x = torch.zeros((len(slide_dataset),), dtype=torch.int32) + slide_tiles_y = torch.zeros((len(slide_dataset),), dtype=torch.int32) + + for i, (x, metadata) in enumerate(slide_tiles_dataloader): + x = x.to(device) + embeddings = cast("torch.Tensor", tile_encoder(x)) + + start = i * config.dataloader.batch_size + end = start + embeddings.size(0) + + slide_tiles_embeddings[start:end] = embeddings.to("cpu") + slide_tiles_x[start:end] = metadata["x"].to("cpu") + slide_tiles_y[start:end] = metadata["y"].to("cpu") + + save_embeddings( + slide_tiles_embeddings, + slide_tiles_x, + slide_tiles_y, + embeddings_path, + ) + + except Exception as e: + print(f"Error processing slide {slide_name}: {e}") + + logger.log_artifacts(str(dest), artifact_path="embeddings") + + +if __name__ == "__main__": + main() diff --git a/project_name/data/__init__.py b/project_name/data/__init__.py deleted file mode 100644 index cee3ad5..0000000 --- a/project_name/data/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from project_name.data.data_module import DataModule - - -__all__ = ["DataModule"] diff --git a/project_name/typing.py b/project_name/typing.py deleted file mode 100644 index af3f90a..0000000 --- a/project_name/typing.py +++ /dev/null @@ -1,8 +0,0 @@ -from typing import TypeAlias - - -Sample: TypeAlias = ... # TODO define the type returned by Dataset - -Input: TypeAlias = ... # TODO define the model input type - -Outputs: TypeAlias = ... # TODO define the model output type diff --git a/pyproject.toml b/pyproject.toml index fb8fc1d..df8c892 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "torch>=2.9.0", "torchmetrics>=1.8.2", "ratiopath>=1.1.1", + "timm>=1.0.24", ] [dependency-groups] diff --git a/scripts/preprocessing/embeddings.py b/scripts/preprocessing/embeddings.py new file mode 100644 index 0000000..d066178 --- /dev/null +++ b/scripts/preprocessing/embeddings.py @@ -0,0 +1,19 @@ +from kube_jobs import storage, submit_job + + +submit_job( + job_name="ulcerative-colitis-embeddings-...", + username=..., + public=False, + cpu=16, + memory="32Gi", + gpu="H100", + script=[ + "git clone https://github.com/RationAI/ulcerative-colitis.git workdir", + "cd workdir", + "export HF_TOKEN=...", + "uv sync --frozen", + "uv run -m preprocessing.embeddings +experiment=...", + ], + storage=[storage.secure.DATA, storage.secure.PROJECTS], +) diff --git a/uv.lock b/uv.lock index 37eaf3f..3e8bb11 100644 --- a/uv.lock +++ b/uv.lock @@ -684,6 +684,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "hf-xet" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" }, + { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" }, + { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" }, + { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" }, + { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" }, + { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" }, + { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -712,6 +727,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[[package]] +name = "huggingface-hub" +version = "1.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "shellingham" }, + { name = "tqdm" }, + { name = "typer-slim" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/25/74af9d16cd59ae15b12467a79a84aa0fe24be4aba68fc4da0c1864d49c17/huggingface_hub-1.3.4.tar.gz", hash = "sha256:c20d5484a611b7b7891d272e8fc9f77d5de025b0480bdacfa858efb3780b455f", size = 627683, upload-time = "2026-01-26T14:05:10.656Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/07/3d0c34c345043c6a398a5882e196b2220dc5861adfa18322448b90908f26/huggingface_hub-1.3.4-py3-none-any.whl", hash = "sha256:a0c526e76eb316e96a91e8a1a7a93cf66b0dd210be1a17bd5fc5ae53cba76bfd", size = 536611, upload-time = "2026-01-26T14:05:08.549Z" }, +] + [[package]] name = "hydra-core" version = "1.3.2" @@ -2229,6 +2265,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] +[[package]] +name = "safetensors" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, +] + [[package]] name = "scikit-image" version = "0.26.0" @@ -2324,6 +2382,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "simsimd" version = "6.5.12" @@ -2486,6 +2553,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/4d/3fd60d3a37b544cb59463add86e4dfbb485880225115341281906a7b140e/tifffile-2026.1.14-py3-none-any.whl", hash = "sha256:29cf4adb43562a4624fc959018ab1b44e0342015d3db4581b983fe40e05f5924", size = 232213, upload-time = "2026-01-14T22:40:41.553Z" }, ] +[[package]] +name = "timm" +version = "1.0.24" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "pyyaml" }, + { name = "safetensors" }, + { name = "torch" }, + { name = "torchvision" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/9d/0ea45640be447445c8664ce2b10c74f763b0b0b9ed11620d41a4d4baa10c/timm-1.0.24.tar.gz", hash = "sha256:c7b909f43fe2ef8fe62c505e270cd4f1af230dfbc37f2ee93e3608492b9d9a40", size = 2412239, upload-time = "2026-01-07T00:26:17.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/dd/c1f5b0890f7b5db661bde0864b41cb0275be76851047e5f7e085fe0b455a/timm-1.0.24-py3-none-any.whl", hash = "sha256:8301ac783410c6ad72c73c49326af6d71a9e4d1558238552796e825c2464913f", size = 2560563, upload-time = "2026-01-07T00:26:13.956Z" }, +] + [[package]] name = "torch" version = "2.10.0" @@ -2539,6 +2622,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/21/aa0f434434c48490f91b65962b1ce863fdcce63febc166ca9fe9d706c2b6/torchmetrics-1.8.2-py3-none-any.whl", hash = "sha256:08382fd96b923e39e904c4d570f3d49e2cc71ccabd2a94e0f895d1f0dac86242", size = 983161, upload-time = "2025-09-03T14:00:51.921Z" }, ] +[[package]] +name = "torchvision" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, + { name = "torch" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920, upload-time = "2026-01-21T16:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556, upload-time = "2026-01-21T16:27:40.125Z" }, + { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351, upload-time = "2026-01-21T16:27:21.074Z" }, + { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059, upload-time = "2026-01-21T16:27:11.08Z" }, +] + [[package]] name = "tornado" version = "6.5.4" @@ -2587,6 +2686,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, ] +[[package]] +name = "typer-slim" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload-time = "2026-01-06T11:21:11.176Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2635,6 +2747,7 @@ dependencies = [ { name = "rationai-tiling" }, { name = "ratiopath" }, { name = "ray" }, + { name = "timm" }, { name = "torch" }, { name = "torchmetrics" }, ] @@ -2664,6 +2777,7 @@ requires-dist = [ { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, { name = "ratiopath", specifier = ">=1.1.1" }, { name = "ray", specifier = ">=2.52.1" }, + { name = "timm", specifier = ">=1.0.24" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, ]