From fb33c976c6e524f617ca38f2c4bb6c4fcc2480c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 18:20:26 +0000 Subject: [PATCH 01/24] chore: rename project folder to ml --- {project_name => ml}/__init__.py | 0 {project_name => ml}/__main__.py | 4 ++-- {project_name => ml}/data/README.md | 0 ml/data/__init__.py | 4 ++++ {project_name => ml}/data/data_module.py | 2 +- {project_name => ml}/modeling/README.md | 0 {project_name => ml}/modeling/__init__.py | 0 {project_name => ml}/project_name_model.py | 2 +- {project_name => ml}/typing.py | 0 project_name/data/__init__.py | 4 ---- 10 files changed, 8 insertions(+), 8 deletions(-) rename {project_name => ml}/__init__.py (100%) rename {project_name => ml}/__main__.py (90%) rename {project_name => ml}/data/README.md (100%) create mode 100644 ml/data/__init__.py rename {project_name => ml}/data/data_module.py (97%) rename {project_name => ml}/modeling/README.md (100%) rename {project_name => ml}/modeling/__init__.py (100%) rename {project_name => ml}/project_name_model.py (97%) rename {project_name => ml}/typing.py (100%) delete mode 100644 project_name/data/__init__.py diff --git a/project_name/__init__.py b/ml/__init__.py similarity index 100% rename from project_name/__init__.py rename to ml/__init__.py diff --git a/project_name/__main__.py b/ml/__main__.py similarity index 90% rename from project_name/__main__.py rename to ml/__main__.py index 035ecc3..6252c56 100644 --- a/project_name/__main__.py +++ b/ml/__main__.py @@ -6,8 +6,8 @@ from omegaconf import DictConfig, OmegaConf from rationai.mlkit import Trainer, autolog -from project_name.data import DataModule -from project_name.project_name_model import ProjectNameModel +from ml.data import DataModule +from ml.project_name_model import ProjectNameModel OmegaConf.register_new_resolver( diff --git a/project_name/data/README.md b/ml/data/README.md similarity index 100% rename from project_name/data/README.md rename to ml/data/README.md diff --git a/ml/data/__init__.py b/ml/data/__init__.py new file mode 100644 index 0000000..e7058ee --- /dev/null +++ b/ml/data/__init__.py @@ -0,0 +1,4 @@ +from ml.data.data_module import DataModule + + +__all__ = ["DataModule"] diff --git a/project_name/data/data_module.py b/ml/data/data_module.py similarity index 97% rename from project_name/data/data_module.py rename to ml/data/data_module.py index d3b8e06..f803ac5 100644 --- a/project_name/data/data_module.py +++ b/ml/data/data_module.py @@ -5,7 +5,7 @@ from omegaconf import DictConfig from torch.utils.data import DataLoader -from project_name.typing import Input +from ml.typing import Input class DataModule(LightningDataModule): diff --git a/project_name/modeling/README.md b/ml/modeling/README.md similarity index 100% rename from project_name/modeling/README.md rename to ml/modeling/README.md diff --git a/project_name/modeling/__init__.py b/ml/modeling/__init__.py similarity index 100% rename from project_name/modeling/__init__.py rename to ml/modeling/__init__.py diff --git a/project_name/project_name_model.py b/ml/project_name_model.py similarity index 97% rename from project_name/project_name_model.py rename to ml/project_name_model.py index 85e50bc..403260d 100644 --- a/project_name/project_name_model.py +++ b/ml/project_name_model.py @@ -3,7 +3,7 @@ from torch.optim.optimizer import Optimizer from torchmetrics import MetricCollection -from project_name.typing import Input, Outputs +from ml.typing import Input, Outputs class ProjectNameModel(LightningModule): diff --git a/project_name/typing.py b/ml/typing.py similarity index 100% rename from project_name/typing.py rename to ml/typing.py diff --git a/project_name/data/__init__.py b/project_name/data/__init__.py deleted file mode 100644 index cee3ad5..0000000 --- a/project_name/data/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from project_name.data.data_module import DataModule - - -__all__ = ["DataModule"] From 81f4f06ad23c5e1c42e85b7035d7c40f96787843 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 18:58:24 +0000 Subject: [PATCH 02/24] chore: dependencies --- configs/experiment/.gitkeep | 0 pyproject.toml | 1 + uv.lock | 114 ++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) delete mode 100644 configs/experiment/.gitkeep diff --git a/configs/experiment/.gitkeep b/configs/experiment/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index 6933a1f..390052a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,6 +21,7 @@ dependencies = [ "torch>=2.9.0", "torchmetrics>=1.8.2", "ratiopath>=1.0.4", + "timm>=1.0.24", ] [dependency-groups] diff --git a/uv.lock b/uv.lock index 352dc6b..f265df8 100644 --- a/uv.lock +++ b/uv.lock @@ -684,6 +684,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" }, ] +[[package]] +name = "hf-xet" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5e/6e/0f11bacf08a67f7fb5ee09740f2ca54163863b07b70d579356e9222ce5d8/hf_xet-1.2.0.tar.gz", hash = "sha256:a8c27070ca547293b6890c4bf389f713f80e8c478631432962bb7f4bc0bd7d7f", size = 506020, upload-time = "2025-10-24T19:04:32.129Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:46740d4ac024a7ca9b22bebf77460ff43332868b661186a8e46c227fdae01848", size = 2866099, upload-time = "2025-10-24T19:04:15.366Z" }, + { url = "https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:27df617a076420d8845bea087f59303da8be17ed7ec0cd7ee3b9b9f579dff0e4", size = 2722178, upload-time = "2025-10-24T19:04:13.695Z" }, + { url = "https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3651fd5bfe0281951b988c0facbe726aa5e347b103a675f49a3fa8144c7968fd", size = 3320214, upload-time = "2025-10-24T19:04:03.596Z" }, + { url = "https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d06fa97c8562fb3ee7a378dd9b51e343bc5bc8190254202c9771029152f5e08c", size = 3229054, upload-time = "2025-10-24T19:04:01.949Z" }, + { url = "https://files.pythonhosted.org/packages/0b/dd/7ac658d54b9fb7999a0ccb07ad863b413cbaf5cf172f48ebcd9497ec7263/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:4c1428c9ae73ec0939410ec73023c4f842927f39db09b063b9482dac5a3bb737", size = 3413812, upload-time = "2025-10-24T19:04:24.585Z" }, + { url = "https://files.pythonhosted.org/packages/92/68/89ac4e5b12a9ff6286a12174c8538a5930e2ed662091dd2572bbe0a18c8a/hf_xet-1.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a55558084c16b09b5ed32ab9ed38421e2d87cf3f1f89815764d1177081b99865", size = 3508920, upload-time = "2025-10-24T19:04:26.927Z" }, + { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735, upload-time = "2025-10-24T19:04:35.928Z" }, +] + [[package]] name = "httpcore" version = "1.0.9" @@ -712,6 +727,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" }, ] +[[package]] +name = "huggingface-hub" +version = "1.3.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "hf-xet", marker = "platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'" }, + { name = "httpx" }, + { name = "packaging" }, + { name = "pyyaml" }, + { name = "shellingham" }, + { name = "tqdm" }, + { name = "typer-slim" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/af/25/74af9d16cd59ae15b12467a79a84aa0fe24be4aba68fc4da0c1864d49c17/huggingface_hub-1.3.4.tar.gz", hash = "sha256:c20d5484a611b7b7891d272e8fc9f77d5de025b0480bdacfa858efb3780b455f", size = 627683, upload-time = "2026-01-26T14:05:10.656Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/55/07/3d0c34c345043c6a398a5882e196b2220dc5861adfa18322448b90908f26/huggingface_hub-1.3.4-py3-none-any.whl", hash = "sha256:a0c526e76eb316e96a91e8a1a7a93cf66b0dd210be1a17bd5fc5ae53cba76bfd", size = 536611, upload-time = "2026-01-26T14:05:08.549Z" }, +] + [[package]] name = "hydra-core" version = "1.3.2" @@ -2229,6 +2265,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] +[[package]] +name = "safetensors" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/29/9c/6e74567782559a63bd040a236edca26fd71bc7ba88de2ef35d75df3bca5e/safetensors-0.7.0.tar.gz", hash = "sha256:07663963b67e8bd9f0b8ad15bb9163606cd27cc5a1b96235a50d8369803b96b0", size = 200878, upload-time = "2025-11-19T15:18:43.199Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/47/aef6c06649039accf914afef490268e1067ed82be62bcfa5b7e886ad15e8/safetensors-0.7.0-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:c82f4d474cf725255d9e6acf17252991c3c8aac038d6ef363a4bf8be2f6db517", size = 467781, upload-time = "2025-11-19T15:18:35.84Z" }, + { url = "https://files.pythonhosted.org/packages/e8/00/374c0c068e30cd31f1e1b46b4b5738168ec79e7689ca82ee93ddfea05109/safetensors-0.7.0-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:94fd4858284736bb67a897a41608b5b0c2496c9bdb3bf2af1fa3409127f20d57", size = 447058, upload-time = "2025-11-19T15:18:34.416Z" }, + { url = "https://files.pythonhosted.org/packages/f1/06/578ffed52c2296f93d7fd2d844cabfa92be51a587c38c8afbb8ae449ca89/safetensors-0.7.0-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e07d91d0c92a31200f25351f4acb2bc6aff7f48094e13ebb1d0fb995b54b6542", size = 491748, upload-time = "2025-11-19T15:18:09.79Z" }, + { url = "https://files.pythonhosted.org/packages/ae/33/1debbbb70e4791dde185edb9413d1fe01619255abb64b300157d7f15dddd/safetensors-0.7.0-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8469155f4cb518bafb4acf4865e8bb9d6804110d2d9bdcaa78564b9fd841e104", size = 503881, upload-time = "2025-11-19T15:18:16.145Z" }, + { url = "https://files.pythonhosted.org/packages/8e/1c/40c2ca924d60792c3be509833df711b553c60effbd91da6f5284a83f7122/safetensors-0.7.0-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:54bef08bf00a2bff599982f6b08e8770e09cc012d7bba00783fc7ea38f1fb37d", size = 623463, upload-time = "2025-11-19T15:18:21.11Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/13784a9364bd43b0d61eef4bea2845039bc2030458b16594a1bd787ae26e/safetensors-0.7.0-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:42cb091236206bb2016d245c377ed383aa7f78691748f3bb6ee1bfa51ae2ce6a", size = 532855, upload-time = "2025-11-19T15:18:25.719Z" }, + { url = "https://files.pythonhosted.org/packages/a0/60/429e9b1cb3fc651937727befe258ea24122d9663e4d5709a48c9cbfceecb/safetensors-0.7.0-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac7252938f0696ddea46f5e855dd3138444e82236e3be475f54929f0c510d48", size = 507152, upload-time = "2025-11-19T15:18:33.023Z" }, + { url = "https://files.pythonhosted.org/packages/3c/a8/4b45e4e059270d17af60359713ffd83f97900d45a6afa73aaa0d737d48b6/safetensors-0.7.0-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1d060c70284127fa805085d8f10fbd0962792aed71879d00864acda69dbab981", size = 541856, upload-time = "2025-11-19T15:18:31.075Z" }, + { url = "https://files.pythonhosted.org/packages/06/87/d26d8407c44175d8ae164a95b5a62707fcc445f3c0c56108e37d98070a3d/safetensors-0.7.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:cdab83a366799fa730f90a4ebb563e494f28e9e92c4819e556152ad55e43591b", size = 674060, upload-time = "2025-11-19T15:18:37.211Z" }, + { url = "https://files.pythonhosted.org/packages/11/f5/57644a2ff08dc6325816ba7217e5095f17269dada2554b658442c66aed51/safetensors-0.7.0-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:672132907fcad9f2aedcb705b2d7b3b93354a2aec1b2f706c4db852abe338f85", size = 771715, upload-time = "2025-11-19T15:18:38.689Z" }, + { url = "https://files.pythonhosted.org/packages/86/31/17883e13a814bd278ae6e266b13282a01049b0c81341da7fd0e3e71a80a3/safetensors-0.7.0-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:5d72abdb8a4d56d4020713724ba81dac065fedb7f3667151c4a637f1d3fb26c0", size = 714377, upload-time = "2025-11-19T15:18:40.162Z" }, + { url = "https://files.pythonhosted.org/packages/4a/d8/0c8a7dc9b41dcac53c4cbf9df2b9c83e0e0097203de8b37a712b345c0be5/safetensors-0.7.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b0f6d66c1c538d5a94a73aa9ddca8ccc4227e6c9ff555322ea40bdd142391dd4", size = 677368, upload-time = "2025-11-19T15:18:41.627Z" }, + { url = "https://files.pythonhosted.org/packages/05/e5/cb4b713c8a93469e3c5be7c3f8d77d307e65fe89673e731f5c2bfd0a9237/safetensors-0.7.0-cp38-abi3-win32.whl", hash = "sha256:c74af94bf3ac15ac4d0f2a7c7b4663a15f8c2ab15ed0fc7531ca61d0835eccba", size = 326423, upload-time = "2025-11-19T15:18:45.74Z" }, + { url = "https://files.pythonhosted.org/packages/5d/e6/ec8471c8072382cb91233ba7267fd931219753bb43814cbc71757bfd4dab/safetensors-0.7.0-cp38-abi3-win_amd64.whl", hash = "sha256:d1239932053f56f3456f32eb9625590cc7582e905021f94636202a864d470755", size = 341380, upload-time = "2025-11-19T15:18:44.427Z" }, +] + [[package]] name = "scikit-image" version = "0.26.0" @@ -2324,6 +2382,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ec/bf/cb6c1c505cb31e818e900b9312d514f381fbfa5c4363edfce0fcc4f8c1a4/shapely-2.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:743044b4cfb34f9a67205cee9279feaf60ba7d02e69febc2afc609047cb49179", size = 1722861, upload-time = "2025-09-24T13:50:43.35Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "simsimd" version = "6.5.12" @@ -2486,6 +2553,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a3/4d/3fd60d3a37b544cb59463add86e4dfbb485880225115341281906a7b140e/tifffile-2026.1.14-py3-none-any.whl", hash = "sha256:29cf4adb43562a4624fc959018ab1b44e0342015d3db4581b983fe40e05f5924", size = 232213, upload-time = "2026-01-14T22:40:41.553Z" }, ] +[[package]] +name = "timm" +version = "1.0.24" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "huggingface-hub" }, + { name = "pyyaml" }, + { name = "safetensors" }, + { name = "torch" }, + { name = "torchvision" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f4/9d/0ea45640be447445c8664ce2b10c74f763b0b0b9ed11620d41a4d4baa10c/timm-1.0.24.tar.gz", hash = "sha256:c7b909f43fe2ef8fe62c505e270cd4f1af230dfbc37f2ee93e3608492b9d9a40", size = 2412239, upload-time = "2026-01-07T00:26:17.541Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/92/dd/c1f5b0890f7b5db661bde0864b41cb0275be76851047e5f7e085fe0b455a/timm-1.0.24-py3-none-any.whl", hash = "sha256:8301ac783410c6ad72c73c49326af6d71a9e4d1558238552796e825c2464913f", size = 2560563, upload-time = "2026-01-07T00:26:13.956Z" }, +] + [[package]] name = "torch" version = "2.10.0" @@ -2538,6 +2621,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/02/21/aa0f434434c48490f91b65962b1ce863fdcce63febc166ca9fe9d706c2b6/torchmetrics-1.8.2-py3-none-any.whl", hash = "sha256:08382fd96b923e39e904c4d570f3d49e2cc71ccabd2a94e0f895d1f0dac86242", size = 983161, upload-time = "2025-09-03T14:00:51.921Z" }, ] +[[package]] +name = "torchvision" +version = "0.25.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, + { name = "torch" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/3a/6ea0d73f49a9bef38a1b3a92e8dd455cea58470985d25635beab93841748/torchvision-0.25.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c2abe430c90b1d5e552680037d68da4eb80a5852ebb1c811b2b89d299b10573b", size = 1874920, upload-time = "2026-01-21T16:27:45.348Z" }, + { url = "https://files.pythonhosted.org/packages/51/f8/c0e1ef27c66e15406fece94930e7d6feee4cb6374bbc02d945a630d6426e/torchvision-0.25.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:b75deafa2dfea3e2c2a525559b04783515e3463f6e830cb71de0fb7ea36fe233", size = 2344556, upload-time = "2026-01-21T16:27:40.125Z" }, + { url = "https://files.pythonhosted.org/packages/68/2f/f24b039169db474e8688f649377de082a965fbf85daf4e46c44412f1d15a/torchvision-0.25.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:f25aa9e380865b11ea6e9d99d84df86b9cc959f1a007cd966fc6f1ab2ed0e248", size = 8072351, upload-time = "2026-01-21T16:27:21.074Z" }, + { url = "https://files.pythonhosted.org/packages/ad/16/8f650c2e288977cf0f8f85184b90ee56ed170a4919347fc74ee99286ed6f/torchvision-0.25.0-cp312-cp312-win_amd64.whl", hash = "sha256:f9c55ae8d673ab493325d1267cbd285bb94d56f99626c00ac4644de32a59ede3", size = 4303059, upload-time = "2026-01-21T16:27:11.08Z" }, +] + [[package]] name = "tornado" version = "6.5.4" @@ -2586,6 +2685,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ab/a8/cdf8b3e4c98132f965f88c2313a4b493266832ad47fb52f23d14d4f86bb5/triton-3.6.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:74caf5e34b66d9f3a429af689c1c7128daba1d8208df60e81106b115c00d6fca", size = 188266850, upload-time = "2026-01-20T16:00:43.041Z" }, ] +[[package]] +name = "typer-slim" +version = "0.21.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload-time = "2026-01-06T11:21:11.176Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -2634,6 +2746,7 @@ dependencies = [ { name = "rationai-tiling" }, { name = "ratiopath" }, { name = "ray" }, + { name = "timm" }, { name = "torch" }, { name = "torchmetrics" }, ] @@ -2663,6 +2776,7 @@ requires-dist = [ { name = "rationai-tiling", git = "https://gitlab.ics.muni.cz/rationai/digital-pathology/libraries/tiling.git" }, { name = "ratiopath", specifier = ">=1.0.4" }, { name = "ray", specifier = ">=2.52.1" }, + { name = "timm", specifier = ">=1.0.24" }, { name = "torch", specifier = ">=2.9.0" }, { name = "torchmetrics", specifier = ">=1.8.2" }, ] From e028c01ee0584c4673ba1fcdbbdfcc2d018a04da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 18:58:58 +0000 Subject: [PATCH 03/24] feat: dataset --- configs/data/tiled/knl_patos/2_224.yaml | 9 ++ ml/data/datasets/__init__.py | 4 + ml/data/datasets/labels.py | 67 ++++++++++++++ ml/data/datasets/tiles.py | 111 ++++++++++++++++++++++++ ml/typing.py | 38 +++++++- 5 files changed, 225 insertions(+), 4 deletions(-) create mode 100644 configs/data/tiled/knl_patos/2_224.yaml create mode 100644 ml/data/datasets/__init__.py create mode 100644 ml/data/datasets/labels.py create mode 100644 ml/data/datasets/tiles.py diff --git a/configs/data/tiled/knl_patos/2_224.yaml b/configs/data/tiled/knl_patos/2_224.yaml new file mode 100644 index 0000000..50e6f52 --- /dev/null +++ b/configs/data/tiled/knl_patos/2_224.yaml @@ -0,0 +1,9 @@ +# @package _global_ + +dataset: + institution: knl_patos + mpp: 1.55 + tile_extent: 224 + uris: + - test_preliminary: ??? + - test_final: ??? \ No newline at end of file diff --git a/ml/data/datasets/__init__.py b/ml/data/datasets/__init__.py new file mode 100644 index 0000000..4163d98 --- /dev/null +++ b/ml/data/datasets/__init__.py @@ -0,0 +1,4 @@ +from ml.data.datasets.tiles import Tiles, TilesPredict + + +__all__ = ["Tiles", "TilesPredict"] diff --git a/ml/data/datasets/labels.py b/ml/data/datasets/labels.py new file mode 100644 index 0000000..df3a270 --- /dev/null +++ b/ml/data/datasets/labels.py @@ -0,0 +1,67 @@ +from enum import Enum +from pathlib import Path + +import pandas as pd +import torch + + +class LabelMode(Enum): + NEUTROPHILS = "neutrophils" + NANCY_HIGH = "nancy_high" + NANCY_LOW = "nancy_low" + ULCERATION = "ulceration" + NANCY_HIGH_ALL = "nancy_high_all" + NANCY_LOW_ALL = "nancy_low_all" + ULCERATION_ALL = "ulceration_all" + + +def process_slides(slides: pd.DataFrame, mode: LabelMode | None = None) -> pd.DataFrame: + match mode: + case LabelMode.NEUTROPHILS: + slides["neutrophils"] = slides["nancy_index"] >= 2 + case LabelMode.NANCY_LOW: + slides = slides[slides["nancy_index"] < 2].copy() + case LabelMode.NANCY_HIGH: + slides = slides[slides["nancy_index"] >= 2].copy() + slides["nancy_index"] -= 2 + case LabelMode.ULCERATION: + slides = slides[slides["nancy_index"] >= 2].copy() + slides["ulceration"] = slides["nancy_index"] == 4 + case LabelMode.NANCY_HIGH_ALL: + # new labels: 0,1 -> 0; 2,3,4 -> 1,2,3 + slides["nancy_index"] = slides["nancy_index"].apply(lambda x: max(0, x - 1)) + case LabelMode.NANCY_LOW_ALL: + # new labels: 0,1 -> 0,1; 2,3,4 -> 2 + slides["nancy_index"] = slides["nancy_index"].apply(lambda x: min(x, 2)) + case LabelMode.ULCERATION_ALL: + slides["ulceration"] = slides["nancy_index"] == 4 + + slides["name"] = slides["path"].apply(lambda x: Path(x).stem) + return slides + + +def get_label(slide_metadata: pd.Series, mode: LabelMode) -> torch.Tensor: + match mode: + case LabelMode.NEUTROPHILS: + return torch.tensor(slide_metadata["neutrophils"].item()).float() + case LabelMode.NANCY_LOW: + return torch.tensor(slide_metadata["nancy_index"].item()).float() + case LabelMode.NANCY_HIGH | LabelMode.NANCY_HIGH_ALL | LabelMode.NANCY_LOW_ALL: + return torch.tensor(slide_metadata["nancy_index"].item()).long() + case LabelMode.ULCERATION | LabelMode.ULCERATION_ALL: + return torch.tensor(slide_metadata["ulceration"].item()).float() + + +def get_target_column(mode: LabelMode) -> str: + match mode: + case LabelMode.NEUTROPHILS: + return "neutrophils" + case ( + LabelMode.NANCY_LOW + | LabelMode.NANCY_HIGH + | LabelMode.NANCY_HIGH_ALL + | LabelMode.NANCY_LOW_ALL + ): + return "nancy_index" + case LabelMode.ULCERATION | LabelMode.ULCERATION_ALL: + return "ulceration" diff --git a/ml/data/datasets/tiles.py b/ml/data/datasets/tiles.py new file mode 100644 index 0000000..d2f06fc --- /dev/null +++ b/ml/data/datasets/tiles.py @@ -0,0 +1,111 @@ +from collections.abc import Iterable +from typing import Generic, TypeVar + +import pandas as pd +from albumentations.core.composition import TransformType +from albumentations.pytorch import ToTensorV2 +from rationai.mlkit.data.datasets import MetaTiledSlides, OpenSlideTilesDataset +from torch.utils.data import Dataset + +from ml.data.datasets.labels import LabelMode, get_label, process_slides +from ml.typing import MetadataTiles, TilesPredictSample, TilesSample + + +T = TypeVar("T", bound=TilesSample | TilesPredictSample) + + +class _Tiles(Dataset[T], Generic[T]): + def __init__( + self, + slide_metadata: pd.Series, + tiles: pd.DataFrame, + mode: LabelMode | str | None, + include_labels: bool = True, + transforms: TransformType | None = None, + ) -> None: + super().__init__() + self.slide_tiles = OpenSlideTilesDataset( + slide_path=slide_metadata["path"], + level=slide_metadata["level"], + tile_extent_x=slide_metadata["tile_extent_x"], + tile_extent_y=slide_metadata["tile_extent_y"], + tiles=tiles, + ) + self.slide_metadata = slide_metadata + self.mode = LabelMode(mode) if mode is not None else None + self.include_labels = include_labels + self.transforms = transforms + self.to_tensor = ToTensorV2() + + if self.include_labels and self.mode is None: + raise ValueError("Mode must be specified if labels are included.") + + def __len__(self) -> int: + return len(self.slide_tiles) + + def __getitem__(self, idx: int) -> TilesSample | TilesPredictSample: + image = self.slide_tiles[idx] + metadata = MetadataTiles( + slide_id=self.slide_tiles.slide_path.stem, + x=self.slide_tiles.tiles.iloc[idx]["x"], + y=self.slide_tiles.tiles.iloc[idx]["y"], + ) + + if self.transforms is not None: + image = self.transforms(image=image)["image"] + + image = self.to_tensor(image=image)["image"] + if not self.include_labels: + return image, metadata + + assert self.mode is not None, "Mode must be specified for labels." + label = get_label(self.slide_metadata, self.mode) + return image, label, metadata + + +class Tiles(MetaTiledSlides[TilesSample]): + def __init__( + self, + uris: Iterable[str] | str, + mode: LabelMode | str, + transforms: TransformType | None = None, + ) -> None: + self.transforms = transforms + self.mode = LabelMode(mode) + super().__init__(uris=(uris,) if isinstance(uris, str) else uris) + + def generate_datasets(self) -> Iterable[_Tiles[TilesSample]]: + self.slides = process_slides(self.slides, self.mode) + return ( + _Tiles( + slide_metadata=slide, + tiles=self.filter_tiles_by_slide(slide["id"]), + mode=self.mode, + include_labels=True, + transforms=self.transforms, + ) + for _, slide in self.slides.iterrows() + ) + + +class TilesPredict(MetaTiledSlides[TilesPredictSample]): + def __init__( + self, + uris: Iterable[str] | str, + transforms: TransformType | None = None, + ) -> None: + self.transforms = transforms + super().__init__(uris=(uris,) if isinstance(uris, str) else uris) + + def generate_datasets(self) -> Iterable[_Tiles[TilesPredictSample]]: + self.slides = process_slides(self.slides) + return ( + _Tiles( + slide_metadata=slide, + tiles=self.filter_tiles_by_slide(slide["id"]), + mode=None, + include_labels=False, + transforms=self.transforms, + ) + for _, slide in self.slides.iterrows() + ) diff --git a/ml/typing.py b/ml/typing.py index af3f90a..f2f26fc 100644 --- a/ml/typing.py +++ b/ml/typing.py @@ -1,8 +1,38 @@ -from typing import TypeAlias +from pathlib import Path +from typing import TypeAlias, TypedDict +import pandas as pd +from torch import Tensor -Sample: TypeAlias = ... # TODO define the type returned by Dataset -Input: TypeAlias = ... # TODO define the model input type +class Metadata(TypedDict): + slide_id: str -Outputs: TypeAlias = ... # TODO define the model output type + +class MetadataTiles(Metadata): + x: int + y: int + + +TilesSample: TypeAlias = tuple[Tensor, Tensor, MetadataTiles] +TilesPredictSample: TypeAlias = tuple[Tensor, MetadataTiles] + + +class MetadataTileEmbeddings(Metadata): + slide_name: str + slide_path: Path + level: int + tile_extent_x: int + tile_extent_y: int + tiles: pd.DataFrame + x: Tensor # Tensor[int] + y: Tensor # Tensor[int] + + +TileEmbeddingsSample: TypeAlias = tuple[Tensor, Tensor, MetadataTileEmbeddings] +TileEmbeddingsPredictSample: TypeAlias = tuple[Tensor, MetadataTileEmbeddings] + +TileEmbeddingsInput: TypeAlias = tuple[Tensor, Tensor, list[MetadataTileEmbeddings]] +TileEmbeddingsPredictInput: TypeAlias = tuple[Tensor, list[MetadataTileEmbeddings]] + +Output: TypeAlias = Tensor From 45b92ea48740716fe1cd544fbdd8fc52c23b69ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Wed, 28 Jan 2026 18:59:12 +0000 Subject: [PATCH 04/24] feat: embeddings --- configs/preprocessing/embeddings.yaml | 0 preprocessing/embeddings.py | 196 ++++++++++++++++++++++++++ 2 files changed, 196 insertions(+) create mode 100644 configs/preprocessing/embeddings.yaml create mode 100644 preprocessing/embeddings.py diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml new file mode 100644 index 0000000..e69de29 diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py new file mode 100644 index 0000000..6afc5ff --- /dev/null +++ b/preprocessing/embeddings.py @@ -0,0 +1,196 @@ +import os +from collections.abc import Iterable +from pathlib import Path +from typing import cast + +import albumentations as A +import hydra +import pandas as pd +import timm +import torch +from huggingface_hub import login +from omegaconf import DictConfig +from rationai.mlkit import autolog, with_cli_args +from rationai.mlkit.lightning.loggers import MLFlowLogger +from timm.layers.mlp import SwiGLUPacked +from torch.utils.data import DataLoader +from tqdm import tqdm + +from ml.data.datasets import TilesPredict + + +class FoundationModel(torch.nn.Module): + def __init__(self, name: str, embed_dim: int) -> None: + """Wrapper for a foundation model - forward and dimension differ depending on the model.""" + super().__init__() + self.embed_dim = embed_dim + + +class ProvGigaPath(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 1536) + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf_hub:prov-gigapath/prov-gigapath", pretrained=True + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.module(x) + + +class Virchow2(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 2560) + + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf-hub:paige-ai/Virchow2", + pretrained=True, + mlp_layer=SwiGLUPacked, + act_layer=torch.nn.SiLU, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + output = self.module(x) # size: B x 261 x 1280 + + class_token = output[:, 0] # size: B x 1280 + patch_tokens = output[ + :, 5: + ] # size: B x 256 x 1280, tokens 1-4 are register tokens so we ignore those + + # concatenate class token and average pool of patch tokens + return torch.cat([class_token, patch_tokens.mean(1)], dim=-1) # size: B x 2560 + + +class UNI2(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 1536) + + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf-hub:MahmoodLab/UNI2-h", + pretrained=True, + img_size=224, + patch_size=14, + depth=24, + num_heads=24, + init_values=1e-5, + embed_dim=1536, + mlp_ratio=2.66667 * 2, + num_classes=0, + no_embed_class=True, + mlp_layer=SwiGLUPacked, + act_layer=torch.nn.SiLU, + reg_tokens=8, + dynamic_img_size=True, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.module(x) + + +def load_dataset(uris: Iterable[str]) -> TilesPredict: + """Load the dataset for tile embeddings. + + Assumes that the dataset has 224x224 RGB tiles. + + Args: + uris (Iterable[str]): The URIs of the tiles. + + Returns: + TilesPredict: The dataset object for tile embeddings. + """ + return TilesPredict( + uris, + transforms=A.Compose( + [ + A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)), + ] + ), + ) + + +def save_embeddings( + slide_tiles_embeddings: torch.Tensor, + slide_tiles_x: torch.Tensor, + slide_tiles_y: torch.Tensor, + embeddings_path: Path, +) -> None: + """Save the slide embeddings to the specified path. + + Args: + slide_tiles_embeddings (torch.Tensor): The embeddings to save. + slide_tiles_x (torch.Tensor): The x-coordinates of the tiles. + slide_tiles_y (torch.Tensor): The y-coordinates of the tiles. + embeddings_path (Path): The path to save the embeddings to. + """ + embeddings_path.parent.mkdir(parents=True, exist_ok=True) + + df = pd.DataFrame( + { + "x": slide_tiles_x.numpy(), + "y": slide_tiles_y.numpy(), + "embedding": [emb.numpy() for emb in slide_tiles_embeddings], + } + ) + + df.to_parquet(embeddings_path, index=False, engine="pyarrow") + + +@with_cli_args(["+preprocessing=embeddings"]) +@hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) +@autolog +def main(config: DictConfig, logger: MLFlowLogger) -> None: + login(token=os.environ["HF_TOKEN"]) + dest = Path(config.output_dir) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + tile_encoder: FoundationModel = hydra.utils.instantiate(config.tile_encoder) + tile_encoder = tile_encoder.to(device) + + with torch.no_grad(): + dataset = load_dataset(config.dataset.uris.values()) + + for slide_dataset in tqdm(dataset.generate_datasets()): + slide_name = str(slide_dataset.slide_metadata["name"]) + embeddings_path = (dest / slide_name).with_suffix(".parquet") + + try: + slide_tiles_dataloader = DataLoader( + slide_dataset, + batch_size=config.dataloader.batch_size, + num_workers=config.dataloader.num_workers, + persistent_workers=config.dataloader.persistent_workers, + ) + slide_tiles_embeddings = torch.zeros( + (len(slide_dataset), tile_encoder.embed_dim), dtype=torch.float32 + ) + slide_tiles_x = torch.zeros((len(slide_dataset),), dtype=torch.int32) + slide_tiles_y = torch.zeros((len(slide_dataset),), dtype=torch.int32) + + for i, (x, metadata) in enumerate(slide_tiles_dataloader): + x = x.to(device) + embeddings = cast("torch.Tensor", tile_encoder(x)) + + start = i * config.dataloader.batch_size + end = start + embeddings.size(0) + + slide_tiles_embeddings[start:end] = embeddings.to("cpu") + slide_tiles_x[start:end] = metadata["x"].to("cpu") + slide_tiles_y[start:end] = metadata["y"].to("cpu") + + save_embeddings( + slide_tiles_embeddings, + slide_tiles_x, + slide_tiles_y, + embeddings_path, + ) + + logger.log_artifact( + local_path=str(embeddings_path), artifact_path="embeddings" + ) + except Exception as e: + print(f"Error processing slide {slide_name}: {e}") + + +if __name__ == "__main__": + main() From 19298040fe6a832b672adcfff5e1c25c5773f3d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sat, 31 Jan 2026 18:46:56 +0000 Subject: [PATCH 05/24] feat: configs --- .../embeddings/knl_patos_1_virchow2.yaml | 6 ++++++ configs/preprocessing/embeddings.yaml | 12 ++++++++++++ .../preprocessing/tile_encoder/prov_gigapath.yaml | 5 +++++ configs/preprocessing/tile_encoder/uni2.yaml | 5 +++++ configs/preprocessing/tile_encoder/virchow2.yaml | 5 +++++ 5 files changed, 33 insertions(+) create mode 100644 configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml create mode 100644 configs/preprocessing/tile_encoder/prov_gigapath.yaml create mode 100644 configs/preprocessing/tile_encoder/uni2.yaml create mode 100644 configs/preprocessing/tile_encoder/virchow2.yaml diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml new file mode 100644 index 0000000..f3d82f2 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/knl_patos/1_224 + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index e69de29..19cb5be 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -0,0 +1,12 @@ +# @package _global_ + +output_dir: ${project_dir}/embeddings/${dataset.institution} + +dataloader: + batch_size: 2048 + num_workers: 8 + persistent_workers: True + +metadata: + run_name: '📂 Embeddings: ${dataset.institution} - ${tile_encoder.name}' + description: Embeddings preprocessing for ${dataset.institution} institution with ${tile_encoder.name} tile encoder. \ No newline at end of file diff --git a/configs/preprocessing/tile_encoder/prov_gigapath.yaml b/configs/preprocessing/tile_encoder/prov_gigapath.yaml new file mode 100644 index 0000000..d6389de --- /dev/null +++ b/configs/preprocessing/tile_encoder/prov_gigapath.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.ProvGigaPath + name: Prov-GigaPath diff --git a/configs/preprocessing/tile_encoder/uni2.yaml b/configs/preprocessing/tile_encoder/uni2.yaml new file mode 100644 index 0000000..dd270d4 --- /dev/null +++ b/configs/preprocessing/tile_encoder/uni2.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.UNI2 + name: UNI2 diff --git a/configs/preprocessing/tile_encoder/virchow2.yaml b/configs/preprocessing/tile_encoder/virchow2.yaml new file mode 100644 index 0000000..32c69fe --- /dev/null +++ b/configs/preprocessing/tile_encoder/virchow2.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.Virchow2 + name: Virchow2 From ec4f22212d87fe098725011dfed08855fab723df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 18:41:11 +0000 Subject: [PATCH 06/24] feat: update tiling uris --- configs/data/tiled/ftn/0_320.yaml | 11 +++++++++++ configs/data/tiled/ftn/0_430.yaml | 11 +++++++++++ configs/data/tiled/ftn/1_224.yaml | 11 +++++++++++ configs/data/tiled/ftn/2_224.yaml | 11 +++++++++++ configs/data/tiled/ikem/0_320.yaml | 11 +++++++++++ configs/data/tiled/ikem/0_430.yaml | 11 +++++++++++ configs/data/tiled/ikem/1_224.yaml | 11 +++++++++++ configs/data/tiled/ikem/2_224.yaml | 11 +++++++++++ configs/data/tiled/knl_patos/0_320.yaml | 10 ++++++++++ configs/data/tiled/knl_patos/0_430.yaml | 10 ++++++++++ configs/data/tiled/knl_patos/1_224.yaml | 10 ++++++++++ configs/data/tiled/knl_patos/2_224.yaml | 5 +++-- 12 files changed, 121 insertions(+), 2 deletions(-) create mode 100644 configs/data/tiled/ftn/0_320.yaml create mode 100644 configs/data/tiled/ftn/0_430.yaml create mode 100644 configs/data/tiled/ftn/1_224.yaml create mode 100644 configs/data/tiled/ftn/2_224.yaml create mode 100644 configs/data/tiled/ikem/0_320.yaml create mode 100644 configs/data/tiled/ikem/0_430.yaml create mode 100644 configs/data/tiled/ikem/1_224.yaml create mode 100644 configs/data/tiled/ikem/2_224.yaml create mode 100644 configs/data/tiled/knl_patos/0_320.yaml create mode 100644 configs/data/tiled/knl_patos/0_430.yaml create mode 100644 configs/data/tiled/knl_patos/1_224.yaml diff --git a/configs/data/tiled/ftn/0_320.yaml b/configs/data/tiled/ftn/0_320.yaml new file mode 100644 index 0000000..06afedd --- /dev/null +++ b/configs/data/tiled/ftn/0_320.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ftn + mpp: 0.17 + tile_extent: 320 + level: 0 + uris: + train: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ftn/0_430.yaml b/configs/data/tiled/ftn/0_430.yaml new file mode 100644 index 0000000..2d1f3ff --- /dev/null +++ b/configs/data/tiled/ftn/0_430.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ftn + mpp: 0.17 + tile_extent: 430 + level: 0 + uris: + train: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ftn/1_224.yaml b/configs/data/tiled/ftn/1_224.yaml new file mode 100644 index 0000000..b8904a8 --- /dev/null +++ b/configs/data/tiled/ftn/1_224.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ftn + mpp: 0.52 + tile_extent: 224 + level: 1 + uris: + train: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ftn/2_224.yaml b/configs/data/tiled/ftn/2_224.yaml new file mode 100644 index 0000000..8e89157 --- /dev/null +++ b/configs/data/tiled/ftn/2_224.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ftn + mpp: 1.55 + tile_extent: 224 + level: 2 + uris: + train: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/0_320.yaml b/configs/data/tiled/ikem/0_320.yaml new file mode 100644 index 0000000..7951f25 --- /dev/null +++ b/configs/data/tiled/ikem/0_320.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ikem + mpp: 0.17 + tile_extent: 320 + level: 0 + uris: + train: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/0_430.yaml b/configs/data/tiled/ikem/0_430.yaml new file mode 100644 index 0000000..d63dfe0 --- /dev/null +++ b/configs/data/tiled/ikem/0_430.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ikem + mpp: 0.17 + tile_extent: 430 + level: 0 + uris: + train: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/1_224.yaml b/configs/data/tiled/ikem/1_224.yaml new file mode 100644 index 0000000..467d3a4 --- /dev/null +++ b/configs/data/tiled/ikem/1_224.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ikem + mpp: 0.52 + tile_extent: 224 + level: 1 + uris: + train: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/2_224.yaml b/configs/data/tiled/ikem/2_224.yaml new file mode 100644 index 0000000..0ec13aa --- /dev/null +++ b/configs/data/tiled/ikem/2_224.yaml @@ -0,0 +1,11 @@ +# @package _global_ + +dataset: + institution: ikem + mpp: 1.55 + tile_extent: 224 + level: 2 + uris: + train: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/knl_patos/0_320.yaml b/configs/data/tiled/knl_patos/0_320.yaml new file mode 100644 index 0000000..e2c7d5b --- /dev/null +++ b/configs/data/tiled/knl_patos/0_320.yaml @@ -0,0 +1,10 @@ +# @package _global_ + +dataset: + institution: knl_patos + mpp: 0.17 + tile_extent: 320 + level: 0 + uris: + test_preliminary: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test final - knl_patos" # TODO update URI diff --git a/configs/data/tiled/knl_patos/0_430.yaml b/configs/data/tiled/knl_patos/0_430.yaml new file mode 100644 index 0000000..dafd755 --- /dev/null +++ b/configs/data/tiled/knl_patos/0_430.yaml @@ -0,0 +1,10 @@ +# @package _global_ + +dataset: + institution: knl_patos + mpp: 0.17 + tile_extent: 430 + level: 0 + uris: + test_preliminary: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/knl_patos/1_224.yaml b/configs/data/tiled/knl_patos/1_224.yaml new file mode 100644 index 0000000..3c86e8e --- /dev/null +++ b/configs/data/tiled/knl_patos/1_224.yaml @@ -0,0 +1,10 @@ +# @package _global_ + +dataset: + institution: knl_patos + mpp: 0.52 + tile_extent: 224 + level: 1 + uris: + test_preliminary: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/knl_patos/2_224.yaml b/configs/data/tiled/knl_patos/2_224.yaml index 50e6f52..3061d80 100644 --- a/configs/data/tiled/knl_patos/2_224.yaml +++ b/configs/data/tiled/knl_patos/2_224.yaml @@ -4,6 +4,7 @@ dataset: institution: knl_patos mpp: 1.55 tile_extent: 224 + level: 2 uris: - - test_preliminary: ??? - - test_final: ??? \ No newline at end of file + test_preliminary: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file From 0baa04d03ee3fcde1f692d1bde846e4a6a1c2d00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 18:52:45 +0000 Subject: [PATCH 07/24] feat: script and confs --- .../embeddings/knl_patos_2_virchow2.yaml | 6 ++++++ configs/preprocessing/embeddings.yaml | 2 +- scripts/preprocessing/embeddings.py | 19 +++++++++++++++++++ 3 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml create mode 100644 scripts/preprocessing/embeddings.py diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml new file mode 100644 index 0000000..42a1eee --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/knl_patos/2_224 + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index 19cb5be..70a703a 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution} dataloader: - batch_size: 2048 + batch_size: 512 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True diff --git a/scripts/preprocessing/embeddings.py b/scripts/preprocessing/embeddings.py new file mode 100644 index 0000000..44d6624 --- /dev/null +++ b/scripts/preprocessing/embeddings.py @@ -0,0 +1,19 @@ +from kube_jobs import storage, submit_job + + +submit_job( + job_name="ulcerative-colitis-embeddings-...", + username=..., + public=False, + cpu=16, + memory="32Gi", + gpu="H100", + script=[ + "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", + "cd workdir", + "export HF_TOKEN=...", + "uv sync --frozen", + "uv run --active -m preprocessing.embeddings +experiment=...", + ], + storage=[storage.secure.DATA, storage.secure.PROJECTS], +) From d04afab7265835a7d714f5f12413bdb6f7a74961 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 18:59:24 +0000 Subject: [PATCH 08/24] feat: update conf --- configs/preprocessing/embeddings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index 70a703a..d1a3c0b 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -1,6 +1,6 @@ # @package _global_ -output_dir: ${project_dir}/embeddings/${dataset.institution} +output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level} dataloader: batch_size: 512 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb From cafc29ec6cb7f7ed2f4a92607dbb25c19e0c7523 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 19:05:17 +0000 Subject: [PATCH 09/24] fix: datamodule --- ml/data/data_module.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/ml/data/data_module.py b/ml/data/data_module.py index f803ac5..f0690ff 100644 --- a/ml/data/data_module.py +++ b/ml/data/data_module.py @@ -5,7 +5,8 @@ from omegaconf import DictConfig from torch.utils.data import DataLoader -from ml.typing import Input + +# from ml.typing import Input TODO class DataModule(LightningDataModule): @@ -27,7 +28,7 @@ def setup(self, stage: str) -> None: case "test": self.test = instantiate(self.datasets["test"]) - def train_dataloader(self) -> Iterable[Input]: + def train_dataloader(self) -> Iterable: return DataLoader( self.train, batch_size=self.batch_size, @@ -37,7 +38,7 @@ def train_dataloader(self) -> Iterable[Input]: persistent_workers=self.num_workers > 0, ) - def val_dataloader(self) -> Iterable[Input]: + def val_dataloader(self) -> Iterable: return DataLoader( self.val, batch_size=self.batch_size, @@ -45,7 +46,7 @@ def val_dataloader(self) -> Iterable[Input]: persistent_workers=self.num_workers > 0, ) - def test_dataloader(self) -> Iterable[Input]: + def test_dataloader(self) -> Iterable: return DataLoader( self.test, batch_size=self.batch_size, num_workers=self.num_workers ) From 1b32f1d0651725b102a8bf26c01896c1fd5f1ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 19:24:18 +0000 Subject: [PATCH 10/24] feat: confs --- configs/preprocessing/embeddings.yaml | 11 ++++++++--- configs/preprocessing/tile_encoder/prov_gigapath.yaml | 2 +- configs/preprocessing/tile_encoder/uni2.yaml | 2 +- configs/preprocessing/tile_encoder/virchow2.yaml | 2 +- 4 files changed, 11 insertions(+), 6 deletions(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index d1a3c0b..3e3c66b 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -1,6 +1,6 @@ # @package _global_ -output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level} +output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: batch_size: 512 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb @@ -8,5 +8,10 @@ dataloader: persistent_workers: True metadata: - run_name: '📂 Embeddings: ${dataset.institution} - ${tile_encoder.name}' - description: Embeddings preprocessing for ${dataset.institution} institution with ${tile_encoder.name} tile encoder. \ No newline at end of file + run_name: '📂 Embeddings: ${dataset.institution} - ${dataset.level} - ${tile_encoder.name}' + description: Embeddings preprocessing for ${dataset.institution} institution with ${tile_encoder.name} tile encoder. + hyperparams: + tile_encoder: ${tile_encoder.name} + level: ${dataset.level} + batch_size: ${dataloader.batch_size} + num_workers: ${dataloader.num_workers} \ No newline at end of file diff --git a/configs/preprocessing/tile_encoder/prov_gigapath.yaml b/configs/preprocessing/tile_encoder/prov_gigapath.yaml index d6389de..a40583e 100644 --- a/configs/preprocessing/tile_encoder/prov_gigapath.yaml +++ b/configs/preprocessing/tile_encoder/prov_gigapath.yaml @@ -2,4 +2,4 @@ tile_encoder: _target_: preprocessing.embeddings.ProvGigaPath - name: Prov-GigaPath + name: prov_gigapath diff --git a/configs/preprocessing/tile_encoder/uni2.yaml b/configs/preprocessing/tile_encoder/uni2.yaml index dd270d4..9febe23 100644 --- a/configs/preprocessing/tile_encoder/uni2.yaml +++ b/configs/preprocessing/tile_encoder/uni2.yaml @@ -2,4 +2,4 @@ tile_encoder: _target_: preprocessing.embeddings.UNI2 - name: UNI2 + name: uni2 diff --git a/configs/preprocessing/tile_encoder/virchow2.yaml b/configs/preprocessing/tile_encoder/virchow2.yaml index 32c69fe..4952488 100644 --- a/configs/preprocessing/tile_encoder/virchow2.yaml +++ b/configs/preprocessing/tile_encoder/virchow2.yaml @@ -2,4 +2,4 @@ tile_encoder: _target_: preprocessing.embeddings.Virchow2 - name: Virchow2 + name: virchow2 From 9ec51f0c0fe5c4f2b01a330853218d7d0822211e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 19:34:53 +0000 Subject: [PATCH 11/24] fix: typo --- scripts/preprocessing/embeddings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/preprocessing/embeddings.py b/scripts/preprocessing/embeddings.py index 44d6624..f416949 100644 --- a/scripts/preprocessing/embeddings.py +++ b/scripts/preprocessing/embeddings.py @@ -13,7 +13,7 @@ "cd workdir", "export HF_TOKEN=...", "uv sync --frozen", - "uv run --active -m preprocessing.embeddings +experiment=...", + "uv run -m preprocessing.embeddings +experiment=...", ], storage=[storage.secure.DATA, storage.secure.PROJECTS], ) From 06f204fb97b3d5422e593761a038bfa56cf6c349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 23:05:12 +0000 Subject: [PATCH 12/24] feat: conf --- configs/preprocessing/embeddings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index 3e3c66b..bf5abb3 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: - batch_size: 512 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + batch_size: 1024 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True From 4b039f4828c131af2e9162c3e8a096b593e1fe2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 23:11:20 +0000 Subject: [PATCH 13/24] feat: confs --- .../preprocessing/embeddings/ftn_1_prov_gigapath.yaml | 6 ++++++ configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml | 6 ++++++ .../experiment/preprocessing/embeddings/ftn_1_virchow2.yaml | 6 ++++++ .../experiment/preprocessing/embeddings/ftn_2_virchow2.yaml | 6 ++++++ .../preprocessing/embeddings/ikem_1_prov_gigapath.yaml | 6 ++++++ .../experiment/preprocessing/embeddings/ikem_1_uni2.yaml | 6 ++++++ .../preprocessing/embeddings/ikem_1_virchow2.yaml | 6 ++++++ .../preprocessing/embeddings/ikem_2_virchow2.yaml | 6 ++++++ .../preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml | 6 ++++++ .../preprocessing/embeddings/knl_patos_1_uni2.yaml | 6 ++++++ configs/preprocessing/embeddings.yaml | 2 +- 11 files changed, 61 insertions(+), 1 deletion(-) create mode 100644 configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml create mode 100644 configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml create mode 100644 configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml new file mode 100644 index 0000000..d557582 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ftn/1_224 + - /preprocessing/tile_encoder/prov_gigapath + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml new file mode 100644 index 0000000..6ac3e09 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ftn/1_224 + - /preprocessing/tile_encoder/uni2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml new file mode 100644 index 0000000..5fbdb1f --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ftn/1_224 + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml new file mode 100644 index 0000000..14f36f7 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ftn/2_224 + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml new file mode 100644 index 0000000..f99a434 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ikem/1_224 + - /preprocessing/tile_encoder/prov_gigapath + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml new file mode 100644 index 0000000..90374d1 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ikem/1_224 + - /preprocessing/tile_encoder/uni2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml new file mode 100644 index 0000000..1f570fc --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ikem/1_224 + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml new file mode 100644 index 0000000..175f6ff --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ikem/2_224 + - /preprocessing/tile_encoder/virchow2 + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml new file mode 100644 index 0000000..e954cf4 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/knl_patos/1_224 + - /preprocessing/tile_encoder/prov_gigapath + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml new file mode 100644 index 0000000..62e7713 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/knl_patos/1_224 + - /preprocessing/tile_encoder/uni2 + - _self_ \ No newline at end of file diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index bf5abb3..3e3c66b 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: - batch_size: 1024 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + batch_size: 512 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True From 1340dbff2ffd8c1ca093a4b4197e3dfbc177ebce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 23:12:01 +0000 Subject: [PATCH 14/24] feat: confs --- configs/preprocessing/embeddings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index 3e3c66b..bf5abb3 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: - batch_size: 512 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + batch_size: 1024 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True From 70ea97c97dfa26eb06dd977fff599b032c086d87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 23:12:39 +0000 Subject: [PATCH 15/24] feat: confs --- configs/preprocessing/embeddings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index bf5abb3..6dacc98 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: - batch_size: 1024 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + batch_size: 2048 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True From 9a9cc4d2964528bef5be1f65b6e14f9606ac4181 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 23:13:12 +0000 Subject: [PATCH 16/24] feat: confs --- configs/preprocessing/embeddings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index 6dacc98..bf5abb3 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: - batch_size: 2048 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + batch_size: 1024 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True From 6c593868cda4de0a2ff454272a0ce8f95b5b0081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Sun, 1 Feb 2026 23:14:49 +0000 Subject: [PATCH 17/24] feat: confs --- configs/preprocessing/embeddings.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing/embeddings.yaml b/configs/preprocessing/embeddings.yaml index bf5abb3..6dacc98 100644 --- a/configs/preprocessing/embeddings.yaml +++ b/configs/preprocessing/embeddings.yaml @@ -3,7 +3,7 @@ output_dir: ${project_dir}/embeddings/${dataset.institution}/${dataset.level}/${tile_encoder.name} dataloader: - batch_size: 1024 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb + batch_size: 2048 # 2048 for H100, 1024 for A40, 512 for mig-2g.20gb num_workers: 8 persistent_workers: True From c6ce394d3ef7f2ae1a01a80f07b37f82a9e63a70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Mon, 2 Feb 2026 15:53:02 +0000 Subject: [PATCH 18/24] feat: reusing already generated embeddings --- preprocessing/embeddings.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py index 6afc5ff..fdf049c 100644 --- a/preprocessing/embeddings.py +++ b/preprocessing/embeddings.py @@ -154,6 +154,13 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: slide_name = str(slide_dataset.slide_metadata["name"]) embeddings_path = (dest / slide_name).with_suffix(".parquet") + if embeddings_path.exists(): + print(f"Embeddings for slide {slide_name} already exist, skipping...") + logger.log_artifact( + local_path=str(embeddings_path), artifact_path="embeddings" + ) + continue + try: slide_tiles_dataloader = DataLoader( slide_dataset, From 0e625396db1ec6be644948dbfee5c6eb33722a08 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Tue, 3 Feb 2026 16:13:51 +0000 Subject: [PATCH 19/24] feat: artifacts logging at the end --- preprocessing/embeddings.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py index fdf049c..a2f23d7 100644 --- a/preprocessing/embeddings.py +++ b/preprocessing/embeddings.py @@ -156,9 +156,9 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: if embeddings_path.exists(): print(f"Embeddings for slide {slide_name} already exist, skipping...") - logger.log_artifact( - local_path=str(embeddings_path), artifact_path="embeddings" - ) + # logger.log_artifact( + # local_path=str(embeddings_path), artifact_path="embeddings" + # ) continue try: @@ -192,12 +192,14 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: embeddings_path, ) - logger.log_artifact( - local_path=str(embeddings_path), artifact_path="embeddings" - ) + # logger.log_artifact( + # local_path=str(embeddings_path), artifact_path="embeddings" + # ) except Exception as e: print(f"Error processing slide {slide_name}: {e}") + logger.log_artifacts(str(dest), artifact_path="embeddings") + if __name__ == "__main__": main() From b394f8284b77f8d3368ba576790859621c80ca65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 18:25:28 +0000 Subject: [PATCH 20/24] fix: typo --- configs/preprocessing.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configs/preprocessing.yaml b/configs/preprocessing.yaml index 6af2711..08a0372 100644 --- a/configs/preprocessing.yaml +++ b/configs/preprocessing.yaml @@ -6,4 +6,4 @@ defaults: dataset: ??? -project_dir: /mnt/projects/inflammatory_bowel_dissease/ulcerative_colitis +project_dir: /mnt/projects/inflammatory_bowel_disease/ulcerative_colitis From 8826dc141bd3ca0a347ae2137d0aec1ac4c092aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 18:46:03 +0000 Subject: [PATCH 21/24] feat: new found models --- .../preprocessing/embeddings/ftn_1_uni.yaml | 6 +++ .../embeddings/ftn_1_virchow.yaml | 6 +++ .../preprocessing/embeddings/ikem_1_uni.yaml | 6 +++ .../embeddings/ikem_1_virchow.yaml | 6 +++ .../embeddings/knl_patos_1_uni.yaml | 6 +++ .../embeddings/knl_patos_1_virchow.yaml | 6 +++ configs/preprocessing/tile_encoder/uni.yaml | 5 +++ .../preprocessing/tile_encoder/virchow.yaml | 5 +++ preprocessing/embeddings.py | 37 +++++++++++++++++++ 9 files changed, 83 insertions(+) create mode 100644 configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml create mode 100644 configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml create mode 100644 configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml create mode 100644 configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml create mode 100644 configs/preprocessing/tile_encoder/uni.yaml create mode 100644 configs/preprocessing/tile_encoder/virchow.yaml diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml new file mode 100644 index 0000000..47267de --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ftn/1_224 + - /preprocessing/tile_encoder/uni + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml new file mode 100644 index 0000000..7732ae3 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ftn/1_224 + - /preprocessing/tile_encoder/virchow + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml new file mode 100644 index 0000000..ce9605c --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ikem/1_224 + - /preprocessing/tile_encoder/uni + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml new file mode 100644 index 0000000..13d51c1 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/ikem/1_224 + - /preprocessing/tile_encoder/virchow + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml new file mode 100644 index 0000000..ad3340c --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/knl_patos/1_224 + - /preprocessing/tile_encoder/uni + - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml new file mode 100644 index 0000000..b0e8d64 --- /dev/null +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml @@ -0,0 +1,6 @@ +# @package _global_ + +defaults: + - /data/tiled/knl_patos/1_224 + - /preprocessing/tile_encoder/virchow + - _self_ \ No newline at end of file diff --git a/configs/preprocessing/tile_encoder/uni.yaml b/configs/preprocessing/tile_encoder/uni.yaml new file mode 100644 index 0000000..c3db256 --- /dev/null +++ b/configs/preprocessing/tile_encoder/uni.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.UNI + name: uni diff --git a/configs/preprocessing/tile_encoder/virchow.yaml b/configs/preprocessing/tile_encoder/virchow.yaml new file mode 100644 index 0000000..69ff16e --- /dev/null +++ b/configs/preprocessing/tile_encoder/virchow.yaml @@ -0,0 +1,5 @@ +# @package _global_ + +tile_encoder: + _target_: preprocessing.embeddings.Virchow + name: virchow diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py index a2f23d7..eb82595 100644 --- a/preprocessing/embeddings.py +++ b/preprocessing/embeddings.py @@ -38,6 +38,26 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.module(x) +class Virchow(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 2560) + + # For this, you need to setup HF_TOKEN= env.variable. + self.module = timm.create_model( + "hf-hub:paige-ai/Virchow", + pretrained=True, + mlp_layer=SwiGLUPacked, + act_layer=torch.nn.SiLU, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + output = self.module(x) + + class_token = output[:, 0] + patch_tokens = output[:, 1:] + return torch.cat([class_token, patch_tokens.mean(1)], dim=-1) + + class Virchow2(FoundationModel): def __init__(self, name: str) -> None: super().__init__(name, 2560) @@ -62,6 +82,23 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return torch.cat([class_token, patch_tokens.mean(1)], dim=-1) # size: B x 2560 +class UNI(FoundationModel): + def __init__(self, name: str) -> None: + super().__init__(name, 1024) + + # For this, you need to setup HF_TOKEN= env.variable. + + self.module = timm.create_model( + "hf-hub:MahmoodLab/uni", + pretrained=True, + init_values=1e-5, + dynamic_img_size=True, + ).eval() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return self.module(x) + + class UNI2(FoundationModel): def __init__(self, name: str) -> None: super().__init__(name, 1536) From 33de3cd66431ac5e44844de1be5b169a380a90f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 21:11:27 +0000 Subject: [PATCH 22/24] feat: confs --- configs/data/tiled/ftn/0_320.yaml | 11 ----------- configs/data/tiled/ftn/0_430.yaml | 11 ----------- configs/data/tiled/ftn/1_224.yaml | 11 ----------- configs/data/tiled/ftn/2_224.yaml | 11 ----------- configs/data/tiled/ikem/0_320.yaml | 11 ----------- configs/data/tiled/ikem/0_430.yaml | 11 ----------- configs/data/tiled/ikem/1_224.yaml | 11 ----------- configs/data/tiled/ikem/2_224.yaml | 11 ----------- configs/data/tiled/knl_patos/0_320.yaml | 10 ---------- configs/data/tiled/knl_patos/0_430.yaml | 10 ---------- configs/data/tiled/knl_patos/1_224.yaml | 10 ---------- configs/data/tiled/knl_patos/2_224.yaml | 10 ---------- configs/dataset/tiled/ftn/0_320.yaml | 11 +++++++++++ configs/dataset/tiled/ftn/0_430.yaml | 11 +++++++++++ configs/dataset/tiled/ftn/1_224.yaml | 11 +++++++++++ configs/dataset/tiled/ftn/2_224.yaml | 11 +++++++++++ configs/dataset/tiled/ikem/0_320.yaml | 11 +++++++++++ configs/dataset/tiled/ikem/0_430.yaml | 11 +++++++++++ configs/dataset/tiled/ikem/1_224.yaml | 11 +++++++++++ configs/dataset/tiled/ikem/2_224.yaml | 11 +++++++++++ configs/dataset/tiled/knl_patos/0_320.yaml | 10 ++++++++++ configs/dataset/tiled/knl_patos/0_430.yaml | 10 ++++++++++ configs/dataset/tiled/knl_patos/1_224.yaml | 10 ++++++++++ configs/dataset/tiled/knl_patos/2_224.yaml | 10 ++++++++++ .../preprocessing/embeddings/ftn_1_prov_gigapath.yaml | 2 +- .../preprocessing/embeddings/ftn_1_uni.yaml | 2 +- .../preprocessing/embeddings/ftn_1_uni2.yaml | 2 +- .../preprocessing/embeddings/ftn_1_virchow.yaml | 2 +- .../preprocessing/embeddings/ftn_1_virchow2.yaml | 2 +- .../preprocessing/embeddings/ftn_2_virchow2.yaml | 2 +- .../embeddings/ikem_1_prov_gigapath.yaml | 2 +- .../preprocessing/embeddings/ikem_1_uni.yaml | 2 +- .../preprocessing/embeddings/ikem_1_uni2.yaml | 2 +- .../preprocessing/embeddings/ikem_1_virchow.yaml | 2 +- .../preprocessing/embeddings/ikem_1_virchow2.yaml | 2 +- .../preprocessing/embeddings/ikem_2_virchow2.yaml | 2 +- .../embeddings/knl_patos_1_prov_gigapath.yaml | 2 +- .../preprocessing/embeddings/knl_patos_1_uni.yaml | 2 +- .../preprocessing/embeddings/knl_patos_1_uni2.yaml | 2 +- .../preprocessing/embeddings/knl_patos_1_virchow.yaml | 2 +- .../embeddings/knl_patos_1_virchow2.yaml | 2 +- .../embeddings/knl_patos_2_virchow2.yaml | 2 +- preprocessing/embeddings.py | 2 +- scripts/preprocessing/embeddings.py | 2 +- 44 files changed, 148 insertions(+), 148 deletions(-) delete mode 100644 configs/data/tiled/ftn/0_320.yaml delete mode 100644 configs/data/tiled/ftn/0_430.yaml delete mode 100644 configs/data/tiled/ftn/1_224.yaml delete mode 100644 configs/data/tiled/ftn/2_224.yaml delete mode 100644 configs/data/tiled/ikem/0_320.yaml delete mode 100644 configs/data/tiled/ikem/0_430.yaml delete mode 100644 configs/data/tiled/ikem/1_224.yaml delete mode 100644 configs/data/tiled/ikem/2_224.yaml delete mode 100644 configs/data/tiled/knl_patos/0_320.yaml delete mode 100644 configs/data/tiled/knl_patos/0_430.yaml delete mode 100644 configs/data/tiled/knl_patos/1_224.yaml delete mode 100644 configs/data/tiled/knl_patos/2_224.yaml create mode 100644 configs/dataset/tiled/ftn/0_320.yaml create mode 100644 configs/dataset/tiled/ftn/0_430.yaml create mode 100644 configs/dataset/tiled/ftn/1_224.yaml create mode 100644 configs/dataset/tiled/ftn/2_224.yaml create mode 100644 configs/dataset/tiled/ikem/0_320.yaml create mode 100644 configs/dataset/tiled/ikem/0_430.yaml create mode 100644 configs/dataset/tiled/ikem/1_224.yaml create mode 100644 configs/dataset/tiled/ikem/2_224.yaml create mode 100644 configs/dataset/tiled/knl_patos/0_320.yaml create mode 100644 configs/dataset/tiled/knl_patos/0_430.yaml create mode 100644 configs/dataset/tiled/knl_patos/1_224.yaml create mode 100644 configs/dataset/tiled/knl_patos/2_224.yaml diff --git a/configs/data/tiled/ftn/0_320.yaml b/configs/data/tiled/ftn/0_320.yaml deleted file mode 100644 index 06afedd..0000000 --- a/configs/data/tiled/ftn/0_320.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ftn - mpp: 0.17 - tile_extent: 320 - level: 0 - uris: - train: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/train - ftn" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test preliminary - ftn" # TODO update URI - test_final: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ftn/0_430.yaml b/configs/data/tiled/ftn/0_430.yaml deleted file mode 100644 index 2d1f3ff..0000000 --- a/configs/data/tiled/ftn/0_430.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ftn - mpp: 0.17 - tile_extent: 430 - level: 0 - uris: - train: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/train - ftn" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test preliminary - ftn" # TODO update URI - test_final: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ftn/1_224.yaml b/configs/data/tiled/ftn/1_224.yaml deleted file mode 100644 index b8904a8..0000000 --- a/configs/data/tiled/ftn/1_224.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ftn - mpp: 0.52 - tile_extent: 224 - level: 1 - uris: - train: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/train - ftn" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test preliminary - ftn" # TODO update URI - test_final: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ftn/2_224.yaml b/configs/data/tiled/ftn/2_224.yaml deleted file mode 100644 index 8e89157..0000000 --- a/configs/data/tiled/ftn/2_224.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ftn - mpp: 1.55 - tile_extent: 224 - level: 2 - uris: - train: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/train - ftn" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test preliminary - ftn" # TODO update URI - test_final: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/0_320.yaml b/configs/data/tiled/ikem/0_320.yaml deleted file mode 100644 index 7951f25..0000000 --- a/configs/data/tiled/ikem/0_320.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ikem - mpp: 0.17 - tile_extent: 320 - level: 0 - uris: - train: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/train - ikem" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test preliminary - ikem" # TODO update URI - test_final: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/0_430.yaml b/configs/data/tiled/ikem/0_430.yaml deleted file mode 100644 index d63dfe0..0000000 --- a/configs/data/tiled/ikem/0_430.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ikem - mpp: 0.17 - tile_extent: 430 - level: 0 - uris: - train: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/train - ikem" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test preliminary - ikem" # TODO update URI - test_final: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/1_224.yaml b/configs/data/tiled/ikem/1_224.yaml deleted file mode 100644 index 467d3a4..0000000 --- a/configs/data/tiled/ikem/1_224.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ikem - mpp: 0.52 - tile_extent: 224 - level: 1 - uris: - train: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/train - ikem" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test preliminary - ikem" # TODO update URI - test_final: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/ikem/2_224.yaml b/configs/data/tiled/ikem/2_224.yaml deleted file mode 100644 index 0ec13aa..0000000 --- a/configs/data/tiled/ikem/2_224.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# @package _global_ - -dataset: - institution: ikem - mpp: 1.55 - tile_extent: 224 - level: 2 - uris: - train: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/train - ikem" # TODO update URI - test_preliminary: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test preliminary - ikem" # TODO update URI - test_final: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/knl_patos/0_320.yaml b/configs/data/tiled/knl_patos/0_320.yaml deleted file mode 100644 index e2c7d5b..0000000 --- a/configs/data/tiled/knl_patos/0_320.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# @package _global_ - -dataset: - institution: knl_patos - mpp: 0.17 - tile_extent: 320 - level: 0 - uris: - test_preliminary: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test preliminary - knl_patos" # TODO update URI - test_final: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test final - knl_patos" # TODO update URI diff --git a/configs/data/tiled/knl_patos/0_430.yaml b/configs/data/tiled/knl_patos/0_430.yaml deleted file mode 100644 index dafd755..0000000 --- a/configs/data/tiled/knl_patos/0_430.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# @package _global_ - -dataset: - institution: knl_patos - mpp: 0.17 - tile_extent: 430 - level: 0 - uris: - test_preliminary: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test preliminary - knl_patos" # TODO update URI - test_final: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/knl_patos/1_224.yaml b/configs/data/tiled/knl_patos/1_224.yaml deleted file mode 100644 index 3c86e8e..0000000 --- a/configs/data/tiled/knl_patos/1_224.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# @package _global_ - -dataset: - institution: knl_patos - mpp: 0.52 - tile_extent: 224 - level: 1 - uris: - test_preliminary: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test preliminary - knl_patos" # TODO update URI - test_final: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/data/tiled/knl_patos/2_224.yaml b/configs/data/tiled/knl_patos/2_224.yaml deleted file mode 100644 index 3061d80..0000000 --- a/configs/data/tiled/knl_patos/2_224.yaml +++ /dev/null @@ -1,10 +0,0 @@ -# @package _global_ - -dataset: - institution: knl_patos - mpp: 1.55 - tile_extent: 224 - level: 2 - uris: - test_preliminary: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test preliminary - knl_patos" # TODO update URI - test_final: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/0_320.yaml b/configs/dataset/tiled/ftn/0_320.yaml new file mode 100644 index 0000000..ff91a77 --- /dev/null +++ b/configs/dataset/tiled/ftn/0_320.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 320 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/bbbe4603bc30495d85ac99093fc9269a/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/0_430.yaml b/configs/dataset/tiled/ftn/0_430.yaml new file mode 100644 index 0000000..79b9c8e --- /dev/null +++ b/configs/dataset/tiled/ftn/0_430.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 430 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/de450f835f0d4462a91b35f4a79a500f/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/1_224.yaml b/configs/dataset/tiled/ftn/1_224.yaml new file mode 100644 index 0000000..91a27da --- /dev/null +++ b/configs/dataset/tiled/ftn/1_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 0.52 +tile_extent: 224 +level: 1 +tiling_uris: + train: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/f85b64a7f96c41e38f86d84956e2dbe9/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ftn/2_224.yaml b/configs/dataset/tiled/ftn/2_224.yaml new file mode 100644 index 0000000..55da441 --- /dev/null +++ b/configs/dataset/tiled/ftn/2_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ftn@_here_ + - _self_ + +mpp: 1.55 +tile_extent: 224 +level: 2 +tiling_uris: + train: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/train - ftn" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test preliminary - ftn" # TODO update URI + test_final: "mlflow-artifacts:/86/5814484b6cd7467e9d712889655479af/artifacts/test final - ftn" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/0_320.yaml b/configs/dataset/tiled/ikem/0_320.yaml new file mode 100644 index 0000000..065bca1 --- /dev/null +++ b/configs/dataset/tiled/ikem/0_320.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 320 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/4486e598446d412d926ac66dadb35e51/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/0_430.yaml b/configs/dataset/tiled/ikem/0_430.yaml new file mode 100644 index 0000000..98c8ff1 --- /dev/null +++ b/configs/dataset/tiled/ikem/0_430.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 430 +level: 0 +tiling_uris: + train: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/fd112e63819c49d999502542b35bfce1/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/1_224.yaml b/configs/dataset/tiled/ikem/1_224.yaml new file mode 100644 index 0000000..0b40c0e --- /dev/null +++ b/configs/dataset/tiled/ikem/1_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 0.52 +tile_extent: 224 +level: 1 +tiling_uris: + train: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/ece822e7c0e3416f97212267c773c8ac/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/ikem/2_224.yaml b/configs/dataset/tiled/ikem/2_224.yaml new file mode 100644 index 0000000..c68e542 --- /dev/null +++ b/configs/dataset/tiled/ikem/2_224.yaml @@ -0,0 +1,11 @@ +defaults: + - /dataset/processed_w_masks/ikem@_here_ + - _self_ + +mpp: 1.55 +tile_extent: 224 +level: 2 +tiling_uris: + train: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/train - ikem" # TODO update URI + test_preliminary: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test preliminary - ikem" # TODO update URI + test_final: "mlflow-artifacts:/86/0c09e1c61d294fa3877b6b21703bab2f/artifacts/test final - ikem" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/knl_patos/0_320.yaml b/configs/dataset/tiled/knl_patos/0_320.yaml new file mode 100644 index 0000000..a84731d --- /dev/null +++ b/configs/dataset/tiled/knl_patos/0_320.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 320 +level: 0 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/7b9a446145b14965981bbac88e8e2c8b/artifacts/test final - knl_patos" # TODO update URI diff --git a/configs/dataset/tiled/knl_patos/0_430.yaml b/configs/dataset/tiled/knl_patos/0_430.yaml new file mode 100644 index 0000000..0e6bb2c --- /dev/null +++ b/configs/dataset/tiled/knl_patos/0_430.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 0.17 +tile_extent: 430 +level: 0 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/eb29255c944d4dad926160a7cb102ad9/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/knl_patos/1_224.yaml b/configs/dataset/tiled/knl_patos/1_224.yaml new file mode 100644 index 0000000..71e47d9 --- /dev/null +++ b/configs/dataset/tiled/knl_patos/1_224.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 0.52 +tile_extent: 224 +level: 1 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/6782155362d54ecc9f1beccb4362d359/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/dataset/tiled/knl_patos/2_224.yaml b/configs/dataset/tiled/knl_patos/2_224.yaml new file mode 100644 index 0000000..b814d37 --- /dev/null +++ b/configs/dataset/tiled/knl_patos/2_224.yaml @@ -0,0 +1,10 @@ +defaults: + - /dataset/processed_w_masks/knl_patos@_here_ + - _self_ + +mpp: 1.55 +tile_extent: 224 +level: 2 +tiling_uris: + test_preliminary: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test preliminary - knl_patos" # TODO update URI + test_final: "mlflow-artifacts:/86/d7486bb6b667433989c3ce1c8ce31d60/artifacts/test final - knl_patos" # TODO update URI \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml index d557582..ae09076 100644 --- a/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml +++ b/configs/experiment/preprocessing/embeddings/ftn_1_prov_gigapath.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ftn/1_224 + - /dataset/tiled/ftn/1_224@dataset - /preprocessing/tile_encoder/prov_gigapath - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml index 47267de..aa5eddb 100644 --- a/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml +++ b/configs/experiment/preprocessing/embeddings/ftn_1_uni.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ftn/1_224 + - /dataset/tiled/ftn/1_224@dataset - /preprocessing/tile_encoder/uni - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml index 6ac3e09..d212ea2 100644 --- a/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml +++ b/configs/experiment/preprocessing/embeddings/ftn_1_uni2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ftn/1_224 + - /dataset/tiled/ftn/1_224@dataset - /preprocessing/tile_encoder/uni2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml index 7732ae3..76c9ea9 100644 --- a/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml +++ b/configs/experiment/preprocessing/embeddings/ftn_1_virchow.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ftn/1_224 + - /dataset/tiled/ftn/1_224@dataset - /preprocessing/tile_encoder/virchow - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml index 5fbdb1f..02e1b1b 100644 --- a/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml +++ b/configs/experiment/preprocessing/embeddings/ftn_1_virchow2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ftn/1_224 + - /dataset/tiled/ftn/1_224@dataset - /preprocessing/tile_encoder/virchow2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml index 14f36f7..f88b565 100644 --- a/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml +++ b/configs/experiment/preprocessing/embeddings/ftn_2_virchow2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ftn/2_224 + - /dataset/tiled/ftn/2_224@dataset - /preprocessing/tile_encoder/virchow2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml index f99a434..4343f54 100644 --- a/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml +++ b/configs/experiment/preprocessing/embeddings/ikem_1_prov_gigapath.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ikem/1_224 + - /dataset/tiled/ikem/1_224@dataset - /preprocessing/tile_encoder/prov_gigapath - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml index ce9605c..0687e5d 100644 --- a/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml +++ b/configs/experiment/preprocessing/embeddings/ikem_1_uni.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ikem/1_224 + - /dataset/tiled/ikem/1_224@dataset - /preprocessing/tile_encoder/uni - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml index 90374d1..0aed9a5 100644 --- a/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml +++ b/configs/experiment/preprocessing/embeddings/ikem_1_uni2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ikem/1_224 + - /dataset/tiled/ikem/1_224@dataset - /preprocessing/tile_encoder/uni2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml index 13d51c1..9d1c211 100644 --- a/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml +++ b/configs/experiment/preprocessing/embeddings/ikem_1_virchow.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ikem/1_224 + - /dataset/tiled/ikem/1_224@dataset - /preprocessing/tile_encoder/virchow - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml index 1f570fc..e4ddf27 100644 --- a/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml +++ b/configs/experiment/preprocessing/embeddings/ikem_1_virchow2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ikem/1_224 + - /dataset/tiled/ikem/1_224@dataset - /preprocessing/tile_encoder/virchow2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml index 175f6ff..d43c5dd 100644 --- a/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml +++ b/configs/experiment/preprocessing/embeddings/ikem_2_virchow2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/ikem/2_224 + - /dataset/tiled/ikem/2_224@dataset - /preprocessing/tile_encoder/virchow2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml index e954cf4..66c2fcd 100644 --- a/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_prov_gigapath.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/knl_patos/1_224 + - /dataset/tiled/knl_patos/1_224@dataset - /preprocessing/tile_encoder/prov_gigapath - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml index ad3340c..f3730ac 100644 --- a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/knl_patos/1_224 + - /dataset/tiled/knl_patos/1_224@dataset - /preprocessing/tile_encoder/uni - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml index 62e7713..ae236ab 100644 --- a/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_uni2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/knl_patos/1_224 + - /dataset/tiled/knl_patos/1_224@dataset - /preprocessing/tile_encoder/uni2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml index b0e8d64..67688a1 100644 --- a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/knl_patos/1_224 + - /dataset/tiled/knl_patos/1_224@dataset - /preprocessing/tile_encoder/virchow - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml index f3d82f2..a8cc0ab 100644 --- a/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml +++ b/configs/experiment/preprocessing/embeddings/knl_patos_1_virchow2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/knl_patos/1_224 + - /dataset/tiled/knl_patos/1_224@dataset - /preprocessing/tile_encoder/virchow2 - _self_ \ No newline at end of file diff --git a/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml b/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml index 42a1eee..8e3dcde 100644 --- a/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml +++ b/configs/experiment/preprocessing/embeddings/knl_patos_2_virchow2.yaml @@ -1,6 +1,6 @@ # @package _global_ defaults: - - /data/tiled/knl_patos/2_224 + - /dataset/tiled/knl_patos/2_224@dataset - /preprocessing/tile_encoder/virchow2 - _self_ \ No newline at end of file diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py index eb82595..6745c67 100644 --- a/preprocessing/embeddings.py +++ b/preprocessing/embeddings.py @@ -185,7 +185,7 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: tile_encoder = tile_encoder.to(device) with torch.no_grad(): - dataset = load_dataset(config.dataset.uris.values()) + dataset = load_dataset(config.dataset.tiling_uris.values()) for slide_dataset in tqdm(dataset.generate_datasets()): slide_name = str(slide_dataset.slide_metadata["name"]) diff --git a/scripts/preprocessing/embeddings.py b/scripts/preprocessing/embeddings.py index f416949..d066178 100644 --- a/scripts/preprocessing/embeddings.py +++ b/scripts/preprocessing/embeddings.py @@ -9,7 +9,7 @@ memory="32Gi", gpu="H100", script=[ - "git clone https://gitlab.ics.muni.cz/rationai/digital-pathology/pathology/ulcerative-colitis.git workdir", + "git clone https://github.com/RationAI/ulcerative-colitis.git workdir", "cd workdir", "export HF_TOKEN=...", "uv sync --frozen", From 8b1553f1e0f206bef17434c6216a78c7c9bf3bf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 21:23:03 +0000 Subject: [PATCH 23/24] chore: remove comments --- preprocessing/embeddings.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py index 6745c67..6d9a58e 100644 --- a/preprocessing/embeddings.py +++ b/preprocessing/embeddings.py @@ -193,9 +193,6 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: if embeddings_path.exists(): print(f"Embeddings for slide {slide_name} already exist, skipping...") - # logger.log_artifact( - # local_path=str(embeddings_path), artifact_path="embeddings" - # ) continue try: @@ -229,9 +226,6 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: embeddings_path, ) - # logger.log_artifact( - # local_path=str(embeddings_path), artifact_path="embeddings" - # ) except Exception as e: print(f"Error processing slide {slide_name}: {e}") From a3fdb24c60692044db5bbea248b4924b6cabc349 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adam=20Kuku=C4=8Dka?= Date: Thu, 12 Feb 2026 22:16:29 +0000 Subject: [PATCH 24/24] fix: PR --- ml/data/datasets/labels.py | 1 + preprocessing/embeddings.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/ml/data/datasets/labels.py b/ml/data/datasets/labels.py index df3a270..67b3509 100644 --- a/ml/data/datasets/labels.py +++ b/ml/data/datasets/labels.py @@ -16,6 +16,7 @@ class LabelMode(Enum): def process_slides(slides: pd.DataFrame, mode: LabelMode | None = None) -> pd.DataFrame: + slides = slides.copy() match mode: case LabelMode.NEUTROPHILS: slides["neutrophils"] = slides["nancy_index"] >= 2 diff --git a/preprocessing/embeddings.py b/preprocessing/embeddings.py index 6d9a58e..b1fe722 100644 --- a/preprocessing/embeddings.py +++ b/preprocessing/embeddings.py @@ -161,8 +161,6 @@ def save_embeddings( slide_tiles_y (torch.Tensor): The y-coordinates of the tiles. embeddings_path (Path): The path to save the embeddings to. """ - embeddings_path.parent.mkdir(parents=True, exist_ok=True) - df = pd.DataFrame( { "x": slide_tiles_x.numpy(), @@ -178,8 +176,14 @@ def save_embeddings( @hydra.main(config_path="../configs", config_name="preprocessing", version_base=None) @autolog def main(config: DictConfig, logger: MLFlowLogger) -> None: - login(token=os.environ["HF_TOKEN"]) + hf_token = os.getenv("HF_TOKEN") + if not hf_token: + raise ValueError( + "Hugging Face token not found. Please set the HF_TOKEN environment variable." + ) + login(token=hf_token) dest = Path(config.output_dir) + dest.mkdir(parents=True, exist_ok=True) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tile_encoder: FoundationModel = hydra.utils.instantiate(config.tile_encoder) tile_encoder = tile_encoder.to(device) @@ -200,7 +204,8 @@ def main(config: DictConfig, logger: MLFlowLogger) -> None: slide_dataset, batch_size=config.dataloader.batch_size, num_workers=config.dataloader.num_workers, - persistent_workers=config.dataloader.persistent_workers, + persistent_workers=config.dataloader.persistent_workers + and config.dataloader.num_workers > 0, ) slide_tiles_embeddings = torch.zeros( (len(slide_dataset), tile_encoder.embed_dim), dtype=torch.float32