diff --git a/.gitignore b/.gitignore index 0a19790..f813099 100644 --- a/.gitignore +++ b/.gitignore @@ -172,3 +172,5 @@ cython_debug/ # PyPI configuration file .pypirc +zoo/* +*.pickle diff --git a/.vscode/launch.json b/.vscode/launch.json index 975030d..5ebfb66 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,12 +5,17 @@ "version": "0.2.0", "configurations": [ { - "name": "Python: Current File", + "name": "SSYA debug", "type": "debugpy", "request": "launch", - "program": "${file}", + "program": "${workspaceFolder}/ssya/main.py", "cwd": "${workspaceFolder}", "console": "integratedTerminal", + "args": [ + "-i", + "${workspaceFolder}/images", + ], + , "justMyCode": false, }, ], diff --git a/README.md b/README.md index ab8c655..8d80861 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,43 @@ -# Template : How to start and customize? +# ssya -- [ ] Create new repository from this template -- [ ] Inside pyproject.toml rename `package_name` -- [ ] Rename aisp_template directory to `package_name` -- [ ] Update `README.md` +SSYA to graficzne narzędzie do segmentacji i wyszukiwania podobnych regionów w zbiorach obrazów, wykorzystujące Segment Anything v2 (SAM2). -# Template directory structure +## Funkcje +- Offline’owe indeksowanie i cache’owanie embeddingów z SAM2 +- Interaktywne GUI z paskami postępu i filtrowaniem według progu podobieństwa +- Szybkie wyszukiwanie podobnych wykryć -- package_name/ - Insert package code here -- tests/ - Insert unit tests here -- scripts/ - Insert scripts here -- images/ - If this is CV/AI repository then insert images here - -# Package name - -Write package short description here. - -# Installation : Developer - -Use poetry to install the package in development mode. +## Wymagania +- Python 3.11+ +## Instalacja ```bash -git clone {URL} -uv sync -uv venv +git clone +cd ssya +pdm install ``` -# Testing - -Run the tests using pytest. - +## Użycie ```bash -uv run pytest +ssya -i /ścieżka/do/dataset ``` +Jeśli nie podasz `-i`, pojawi się okno dialogowe do wyboru folderu ze zbiorami. -# Release +## Format danych +Umieść w katalogu obrazy i pliki anotacji TXT (jedna linia na obiekt: `klasa xc yc szerokość wysokość`, wartości znormalizowane). -Github workflow is created to automatically release the package to PyPI when a new tag "vX.X.X" (example v1.0.0) is pushed to the main branch. +## Testy +```bash +pdm run pytest +``` +## Wydania +Nowe tagi `vX.X.X` wrzucane na `main` są automatycznie publikowane na PyPI. +Możesz też ręcznie: ```bash git tag vX.X.X git push --tags -``` - -Or manually build and upload the package to PyPI using the following command. - -``` -uv build +pdm build +pdm publish ``` diff --git a/images/01101362e06fd5720647eeba5922bf3cb432f4ca.png b/images/01101362e06fd5720647eeba5922bf3cb432f4ca.png new file mode 100644 index 0000000..13e3900 Binary files /dev/null and b/images/01101362e06fd5720647eeba5922bf3cb432f4ca.png differ diff --git a/images/01101362e06fd5720647eeba5922bf3cb432f4ca.txt b/images/01101362e06fd5720647eeba5922bf3cb432f4ca.txt new file mode 100644 index 0000000..e69de29 diff --git a/images/192e2ab2dd07dfe002586af2df6fd5ad775d0073.jpg b/images/192e2ab2dd07dfe002586af2df6fd5ad775d0073.jpg new file mode 100644 index 0000000..3de5f53 Binary files /dev/null and b/images/192e2ab2dd07dfe002586af2df6fd5ad775d0073.jpg differ diff --git a/images/414b2c60d4399fd180320d42e7c35b2514b2e0fb.JPG b/images/414b2c60d4399fd180320d42e7c35b2514b2e0fb.JPG new file mode 100644 index 0000000..cfaa4fb Binary files /dev/null and b/images/414b2c60d4399fd180320d42e7c35b2514b2e0fb.JPG differ diff --git a/images/414b2c60d4399fd180320d42e7c35b2514b2e0fb.txt b/images/414b2c60d4399fd180320d42e7c35b2514b2e0fb.txt new file mode 100644 index 0000000..a2c5e60 --- /dev/null +++ b/images/414b2c60d4399fd180320d42e7c35b2514b2e0fb.txt @@ -0,0 +1 @@ +3 0.493349 0.459251 0.911729 0.442731 diff --git a/images/7e954dc0a66a0659163da55410280574388f137d.jpg b/images/7e954dc0a66a0659163da55410280574388f137d.jpg new file mode 100644 index 0000000..9dbc16e Binary files /dev/null and b/images/7e954dc0a66a0659163da55410280574388f137d.jpg differ diff --git a/images/7e954dc0a66a0659163da55410280574388f137d.txt b/images/7e954dc0a66a0659163da55410280574388f137d.txt new file mode 100644 index 0000000..312cff4 --- /dev/null +++ b/images/7e954dc0a66a0659163da55410280574388f137d.txt @@ -0,0 +1,2 @@ +2 0.410937 0.560937 0.268750 0.288542 +2 0.721484 0.518750 0.228906 0.179167 diff --git a/images/8f4de79e2f402d169ca9d902d8bbd45be34a6361.png b/images/8f4de79e2f402d169ca9d902d8bbd45be34a6361.png new file mode 100644 index 0000000..0c61cb7 Binary files /dev/null and b/images/8f4de79e2f402d169ca9d902d8bbd45be34a6361.png differ diff --git a/images/8f4de79e2f402d169ca9d902d8bbd45be34a6361.txt b/images/8f4de79e2f402d169ca9d902d8bbd45be34a6361.txt new file mode 100644 index 0000000..c340b17 --- /dev/null +++ b/images/8f4de79e2f402d169ca9d902d8bbd45be34a6361.txt @@ -0,0 +1,7 @@ +6 0.415104 0.306019 0.545833 0.350926 +2 0.670052 0.399537 0.165104 0.191667 +2 0.926042 0.181019 0.020833 0.036111 +2 0.685937 0.126389 0.022917 0.025000 +2 0.890365 0.175000 0.021354 0.024074 +2 0.865104 0.165278 0.019792 0.019444 +14 0.608333 0.418981 0.016667 0.021296 diff --git a/images/image.png b/images/image.png new file mode 100644 index 0000000..60bfd49 Binary files /dev/null and b/images/image.png differ diff --git a/images/image.txt b/images/image.txt new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 79c2bf8..ea22cb1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ dynamic = ["version"] dependencies = [ "dotenv>=0.9.9", + "sam2>=1.1.0", "yaya-tools", ] diff --git a/ssya/controllers/dataset_manager.py b/ssya/controllers/dataset_manager.py new file mode 100644 index 0000000..d0e400f --- /dev/null +++ b/ssya/controllers/dataset_manager.py @@ -0,0 +1,77 @@ +import logging +from pathlib import Path + +import cv2 +import numpy as np +from tqdm import tqdm +from yaya_tools.helpers.dataset import load_directory_images_annotatations + +from ssya.controllers.features_index import FeatureIndex +from ssya.controllers.sam2_wrapper import Sam2Runner +from ssya.models.detection import Detection + +logger = logging.getLogger(__name__) + + +class DatasetManager: + """Loads dataset, detections, builds/loads feature index.""" + + def __init__(self, root: Path): + self.root = root + ann_map = load_directory_images_annotatations(str(root)) + self.images: list[str] = list(ann_map.keys()) + self.ann_map = ann_map + self.detections: dict[str, list[Detection]] = {} + for img_idx, img_path in enumerate(self.images): + if not ann_map[img_path]: + self.detections[img_path] = [] + continue + with open(root / ann_map[img_path]) as f: + lines = [l.split() for l in f] + self.detections[img_path] = [ + Detection(int(cls), (float(xc), float(yc), float(w), float(h)), img_idx) for cls, xc, yc, w, h in lines + ] + logger.info("Dataset: %d images (%d with annotations)", len(self.images), len(self.detections)) + + # Build or load feature index --------------------------------- + self.index_path = root / "features.pickle" + if self.index_path.exists(): + logger.info("Loading cached features …") + self.fidx = FeatureIndex.load(self.index_path) + else: + self.fidx = FeatureIndex() + self._build_index() + self.fidx.save(self.index_path) + + # Detections : Update with embeddings from the index + for img_path, dets in self.detections.items(): + self.detections[img_path] = self.fidx.get_features(dets) + + # ------------------------------------------------------------------ + + def _build_index(self) -> None: + sam = Sam2Runner() + logger.info("Building feature index (SAM2)…") + for img_idx, img_path in enumerate(tqdm(self.images, desc="Images")): + img = cv2.imread(str(self.root / img_path)) + if img is None: + continue + for det_idx, det in enumerate(self.detections[img_path]): + mask, emb = sam.mask_and_embed(img, det.bbox_pixels(img.shape[1], img.shape[0])) + det.embedding = emb + self.fidx.add(img_idx, det_idx, emb) + + # ------------------------------------------------------------------ + + # Convenience helpers used by GUI ---------------------------------- + def image(self, idx: int) -> np.ndarray: + """Get image at index `idx`.""" + return cv2.imread(str(self.root / self.images[idx])) + + def image_detections(self, idx: int) -> list[Detection]: + """Get detections for the image at index `idx`.""" + return self.detections[self.images[idx]] + + def image_count(self) -> int: + """Get the number of images in the dataset.""" + return len(self.images) diff --git a/ssya/controllers/features_index.py b/ssya/controllers/features_index.py new file mode 100644 index 0000000..0c021e5 --- /dev/null +++ b/ssya/controllers/features_index.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import logging +import pickle +from pathlib import Path +from typing import Any + +import numpy as np + +from ssya.helpers.metrics import cosine_similarity +from ssya.models.detection import Detection # type: ignore + +logger = logging.getLogger(__name__) + + +class FeatureIndex: + """Persistent RAM index: list of (image_idx, det_idx, embedding).""" + + def __init__(self, entries: list[dict[str, Any]] | None = None): + """Initialize with existing entries or empty.""" + if entries is not None: + self.entries = entries + else: + self.entries: list[dict[str, Any]] = [] + + def add(self, image_idx: int, det_idx: int, emb: np.ndarray): + self.entries.append({"image_idx": image_idx, "det_idx": det_idx, "emb": emb}) + + def save(self, path: Path): + with open(path, "wb") as f: + pickle.dump(self.entries, f, protocol=pickle.HIGHEST_PROTOCOL) + + def get_features(self, detections: list[Detection]) -> list[Detection]: + """Update detections list with embeddings from the index.""" + for det in detections: + if det.embedding is None: + for e in self.entries: + if e["image_idx"] == det.image_idx and e["det_idx"] == det.class_id: + det.embedding = e["emb"] + break + + return detections + + @classmethod + def load(cls, path: Path) -> FeatureIndex: + with open(path, "rb") as f: + entries = pickle.load(f) + + return cls(entries) + + def get_similar_images(self, ref_emb: np.ndarray, thresh: float) -> set[int]: + """Find images with at least one detection above the threshold.""" + imgs: set[int] = set() + for e in self.entries: + if cosine_similarity(ref_emb, e["emb"]) >= thresh: + imgs.add(e["image_idx"]) + + return imgs diff --git a/ssya/controllers/sam2_wrapper.py b/ssya/controllers/sam2_wrapper.py new file mode 100644 index 0000000..609b41a --- /dev/null +++ b/ssya/controllers/sam2_wrapper.py @@ -0,0 +1,95 @@ +from __future__ import annotations + +import logging +import os +from pathlib import Path + +import cv2 # type: ignore +import numpy as np # type: ignore +import requests +import torch +import torch.nn.functional as F +from sam2.build_sam import build_sam2 +from sam2.sam2_image_predictor import SAM2ImagePredictor + +logger = logging.getLogger(__name__) + + +def gem_pooling(features: torch.Tensor, mask: torch.Tensor, p: float = 3.0): + """ + GeM pooling z maską: features (B, C, H, W), mask (B, 1, H, W) – bool/int. + Zwraca (B, C) + """ + eps = 1e-6 + masked = features * mask # (B, C, H, W) + pooled = F.avg_pool2d(masked.clamp(min=eps).pow(p), kernel_size=masked.shape[-2:]) # (B, C, 1, 1) + pooled = pooled.pow(1.0 / p).squeeze(-1).squeeze(-1) + # uwzględnij liczbę aktywnych pikseli + denom = mask.flatten(2).sum(-1).clamp(min=1e-6) # (B,1) + pooled = pooled / denom + return F.normalize(pooled, dim=-1) + + +class Sam2Runner: + """Light wrapper that exposes mask + embedding for a bbox.""" + + _instance = None # singleton for reuse + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._init_model() + return cls._instance + + # ------------------------------------------------------------------ + + def _init_model(self) -> None: + """Initialize the SAM2 model.""" + model_path = "zoo/sam2_tiny.pth" + if not os.path.exists(model_path): + url = "https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_tiny.pt" + Path(model_path).parent.mkdir(parents=True, exist_ok=True) + logger.info("Downloading SAM2 weights …") + with requests.get(url, stream=True) as r, open(model_path, "wb") as f: + for chunk in r.iter_content(1 << 14): + f.write(chunk) + cfg = "configs/sam2.1/sam2.1_hiera_t.yaml" + device = "cuda" if os.environ.get("CUDA_VISIBLE_DEVICES", "") else "cpu" + model = build_sam2(cfg, model_path).to(device).eval() + self._predictor = SAM2ImagePredictor(model) + self.device = device + + # ------------------------------------------------------------------ + + def mask_and_embed(self, img_bgr: np.ndarray, box_px: tuple[int, int, int, int]) -> tuple[np.ndarray, np.ndarray]: + img_rgb = img_bgr[:, :, ::-1].copy() + self._predictor.set_image(img_rgb) # ← tutaj SAM2 wylicza embedding + + # ---------- segmentacja ---------- + masks, _, _ = self._predictor.predict( + box=np.array([box_px[0], box_px[1], box_px[0] + box_px[2], box_px[1] + box_px[3]]), + multimask_output=False, + return_logits=False, + ) + mask_hr = masks[0] # (H, W) bool + + # ---------- mapa cech ---------- + feat_container = getattr(self._predictor, "_features", None) + if feat_container is None: + raise RuntimeError("Brak _features w predictorze — sprawdź wersję biblioteki") + + # słownik → weź 'image_embed' + feat_map = feat_container.get("image_embed", None) if isinstance(feat_container, dict) else feat_container + + if feat_map is None or not torch.is_tensor(feat_map): + raise RuntimeError("Nie znalazłem tensora z mapą cech w _features") + + B, C, h, w = feat_map.shape + + mask_lr = cv2.resize(mask_hr.astype(np.uint8), (w, h), interpolation=cv2.INTER_NEAREST).astype(bool) + mask_t = torch.from_numpy(mask_lr).to(feat_map.device).view(1, 1, h, w) + + emb_t = gem_pooling(feat_map, mask_t, p=3.0) # z poprzedniej odpowiedzi + emb = emb_t.cpu().numpy()[0] # (C,) + + return mask_hr, emb diff --git a/ssya/helpers/metrics.py b/ssya/helpers/metrics.py new file mode 100644 index 0000000..f7405c9 --- /dev/null +++ b/ssya/helpers/metrics.py @@ -0,0 +1,9 @@ +import numpy as np + + +def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float: + if a is None or b is None: + return 0.0 + if np.linalg.norm(a) == 0 or np.linalg.norm(b) == 0: + return 0.0 + return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))) diff --git a/ssya/main.py b/ssya/main.py index 584e443..20daf7a 100644 --- a/ssya/main.py +++ b/ssya/main.py @@ -1,68 +1,186 @@ +from __future__ import annotations + import argparse import logging +import sys +from pathlib import Path -from yaya_tools import __version__ -from yaya_tools.helpers.annotations import ( - annotations_load_as_sv, -) -from yaya_tools.helpers.dataset import ( - get_images_annotated, - load_directory_images_annotatations, +import numpy as np # type: ignore +from PyQt5.QtCore import Qt +from PyQt5.QtWidgets import ( + QApplication, + QFileDialog, + QLabel, + QListWidget, + QMessageBox, + QPushButton, + QSlider, + QSplitter, + QVBoxLayout, + QWidget, ) +from ssya.controllers.dataset_manager import DatasetManager +from ssya.controllers.sam2_wrapper import Sam2Runner +from ssya.models.detection import Detection +from ssya.widgets.image_viewer import ImageViewer # type: ignore + logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") + + +class MainWindow(QWidget): + def __init__(self, dm: DatasetManager): + super().__init__() + self.dm = dm + self.setWindowTitle("SSYA – embeddings cache & filtering") + + # widgets ------------------------------------------------------ + self.viewer = ImageViewer() + + self.files_list = QListWidget() + self.files_list.addItems(dm.images) + self.dets_list = QListWidget() + + self.slider = QSlider(Qt.Horizontal, minimum=0, maximum=100, value=50) + btn_similar = QPushButton("🔍 Find similar") + btn_clear = QPushButton("❌ Clear filter") + + # layout ------------------------------------------------------- + side = QVBoxLayout() + side.addWidget(QLabel("Images")) + side.addWidget(self.files_list) + side.addWidget(QLabel("Detections")) + side.addWidget(self.dets_list) + side.addWidget(QLabel("Threshold")) + side.addWidget(self.slider) + side.addWidget(btn_similar) + side.addWidget(btn_clear) + side.addStretch() + + splitter = QSplitter() + splitter.addWidget(self.viewer) + side_widget = QWidget() + side_widget.setLayout(side) + splitter.addWidget(side_widget) + splitter.setSizes([800, 300]) + + QVBoxLayout(self).addWidget(splitter) + + # state -------------------------------------------------------- + self.cur_img_idx = 0 + self.selected_mask: list[np.ndarray] = [] + self.selected_detection: Detection | None = None + + # signals ------------------------------------------------------ + self.files_list.currentRowChanged.connect(self.on_file_select) + self.dets_list.currentRowChanged.connect(self.on_det_select) + btn_similar.clicked.connect(self.on_find_similar) + btn_clear.clicked.connect(self.on_clear_filter) + + self.display_image(0) + + # ------------------------------------------------------------------ + + def display_image(self, idx: int) -> None: + """Display image and its detections.""" + self.cur_img_idx = idx + img = self.dm.image(idx) + dets = self.dm.image_detections(idx) + self.dets_list.clear() + for i, d in enumerate(dets): + self.dets_list.addItem(f"#{i} cls={d.class_id}") + self.selected_mask = [] + self.viewer.show_image( + img, + dets, + self.selected_mask, + selected_detection=self.selected_detection, + sim_threshold=self.slider.value() / 100.0, + ) + + # ------------------------------------------------------------------ + + def on_file_select(self, row: int): + if row >= 0: + self.display_image(row) + + # ------------------------------------------------------------------ + + def on_det_select(self, row: int): + if row < 0: + return + det = self.dm.image_detections(self.cur_img_idx)[row] + img = self.dm.image(self.cur_img_idx) + + # Always re-run SAM2 to get the embedding + sam = Sam2Runner() + mask, emb = sam.mask_and_embed(img, det.bbox_pixels(img.shape[1], img.shape[0])) + det.embedding = emb + + self.selected_mask = [mask] + self.viewer.show_image( + img, + self.dm.image_detections(self.cur_img_idx), + self.selected_mask, + selected_detection=det, + sim_threshold=self.slider.value() / 100.0, + ) + + # ------------------------------------------------------------------ + + def on_find_similar(self) -> None: + """Find images with at least one detection above the threshold.""" + row = self.dets_list.currentRow() + if row < 0: + QMessageBox.warning(self, "Select", "Select a detection first") + return + + det = self.dm.image_detections(self.cur_img_idx)[row] + if det.embedding is None: + QMessageBox.warning(self, "No embedding", "Embedding missing – click detection again") + return + + self.selected_detection = det + + thresh = self.slider.value() / 100.0 + keep = self.dm.fidx.get_similar_images(det.embedding, thresh) + + self.files_list.clear() + self.files_list.addItems([self.dm.images[i] for i in sorted(keep)]) + + if keep: + self.files_list.setCurrentRow(0) + + def on_clear_filter(self): + self.files_list.clear() + self.files_list.addItems(self.dm.images) + self.files_list.setCurrentRow(self.cur_img_idx) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("-i", "--dataset_path", type=Path) + args = ap.parse_args() + + app = QApplication(sys.argv) + ds_path = args.dataset_path + if ds_path is None: + dlg = QFileDialog() + dlg.setFileMode(QFileDialog.Directory) + if dlg.exec_(): + sel = dlg.selectedFiles() + if sel: + ds_path = Path(sel[0]) + if ds_path is None: + sys.exit("Dataset path missing") -def logging_terminal_setup() -> None: - """ - Setup logging for the application. - - Parameters - ---------- - path_field : str - Field in the config file that contains the path to the log file. - Default is "path". - is_terminal : bool - If True, logs will be printed to the terminal. - Default is True. - """ - logging.getLogger().setLevel(logging.DEBUG) # Ensure log level is set to DEBUG - formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s") - console = logging.StreamHandler() - console.setLevel(logging.DEBUG) - console.setFormatter(formatter) - logging.getLogger().addHandler(console) - logger.info("\n\n###### Logging start of terminal session ######\n") - - -def main() -> None: - """ - Main function for dataset management - - Returns - ------- - None - """ - logging_terminal_setup() - - # Argument parser - parser = argparse.ArgumentParser(add_help=False, description="YAYa dataset management tool") - parser.add_argument("-i", "--dataset_path", type=str, required=True, help="Path to the dataset folder") - parser.add_argument("-h", "--help", action="help", help="Show this help message and exit.") - parser.add_argument("-v", action="version", version=__version__, help="Show version and exit.") - args = parser.parse_args() - - dataset_path = args.dataset_path - - # All images : with optional annotation filename - all_images_annotations: dict[str, str | None] = load_directory_images_annotatations(dataset_path) - # Images annotated : Filter only - images_annotated: list[str] = get_images_annotated(all_images_annotations) - - # All annotations as SV : Get - all_annotations_sv, all_negatives = annotations_load_as_sv(all_images_annotations, dataset_path) - - # TODO + dm = DatasetManager(ds_path) + win = MainWindow(dm) + win.resize(1400, 800) + win.show() + sys.exit(app.exec_()) if __name__ == "__main__": diff --git a/ssya/models/detection.py b/ssya/models/detection.py new file mode 100644 index 0000000..0da8e37 --- /dev/null +++ b/ssya/models/detection.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass + +import numpy as np + + +@dataclass +class Detection: + class_id: int + bbox_norm: tuple[float, float, float, float] + image_idx: int # index in DatasetManager.images + embedding: np.ndarray | None = None + + def bbox_pixels(self, img_w: int, img_h: int) -> tuple[int, int, int, int]: + xc, yc, w, h = self.bbox_norm + w_px, h_px = int(w * img_w), int(h * img_h) + x1 = int((xc * img_w) - w_px / 2) + y1 = int((yc * img_h) - h_px / 2) + return x1, y1, w_px, h_px diff --git a/ssya/widgets/image_viewer.py b/ssya/widgets/image_viewer.py new file mode 100644 index 0000000..4de4721 --- /dev/null +++ b/ssya/widgets/image_viewer.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import logging + +import cv2 # type: ignore +import numpy as np # type: ignore +from PyQt5.QtCore import Qt +from PyQt5.QtGui import QImage, QPixmap +from PyQt5.QtWidgets import ( + QLabel, + QSizePolicy, + QVBoxLayout, + QWidget, +) + +from ssya.helpers.metrics import cosine_similarity +from ssya.models.detection import Detection # type: ignore + +logger = logging.getLogger(__name__) + + +class ImageViewer(QWidget): + """Widget to display images with detections and masks.""" + + def __init__(self): + super().__init__() + self.lbl = QLabel(alignment=Qt.AlignCenter) + self.lbl.setSizePolicy(QSizePolicy.Expanding, QSizePolicy.Expanding) + self.lbl.setScaledContents(True) + # Fixed with and height set, 1920 - 200 , 1080 - 200 + self.lbl.setFixedWidth(1620) + self.lbl.setFixedHeight(880) + + QVBoxLayout(self).addWidget(self.lbl) + + def show_image( + self, + img_bgr: np.ndarray, + dets: list[Detection], + masks: list[np.ndarray], + selected_detection: Detection | None = None, + sim_threshold: float = 0.5, + ): + """Display image with detections and masks.""" + if img_bgr is None: + return + + disp = img_bgr.copy() + h, w, _ = disp.shape + + for m in masks: + disp[m > 0] = (disp[m > 0] * 0.4 + np.array([255, 0, 0]) * 0.6).astype(np.uint8) + + for d in dets: + x, y, bw, bh = d.bbox_pixels(w, h) + cv2.rectangle(disp, (x, y), (x + bw, y + bh), (255, 0, 0), 1) + cv2.putText(disp, str(d.class_id), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1) + + if selected_detection is not None: + for det in dets: + sim = cosine_similarity(selected_detection.embedding, det.embedding) + x, y, bw, bh = det.bbox_pixels(w, h) + cv2.putText(disp, f"{sim:.2f}", (x, y - 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) + if sim < sim_threshold: + continue + + cv2.rectangle(disp, (x, y), (x + bw, y + bh), (0, 255, 0), thickness=2) + + rgb = cv2.cvtColor(disp, cv2.COLOR_BGR2RGB) + qimg = QImage(rgb.data, w, h, QImage.Format_RGB888) + self.lbl.setPixmap(QPixmap.fromImage(qimg)) diff --git a/uv.lock b/uv.lock index 354a1ea..002dfa4 100644 --- a/uv.lock +++ b/uv.lock @@ -43,6 +43,12 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 }, ] +[[package]] +name = "antlr4-python3-runtime" +version = "4.9.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034 } + [[package]] name = "certifi" version = "2025.7.9" @@ -205,6 +211,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/d4/1d85a1996b6188cd2713230e002d79a6f3a289bb17cef600cba385848b72/fonttools-4.58.5-py3-none-any.whl", hash = "sha256:e48a487ed24d9b611c5c4b25db1e50e69e9854ca2670e39a3486ffcd98863ec4", size = 1115318 }, ] +[[package]] +name = "fsspec" +version = "2025.5.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/00/f7/27f15d41f0ed38e8fcc488584b57e902b331da7f7c6dcda53721b15838fc/fsspec-2025.5.1.tar.gz", hash = "sha256:2e55e47a540b91843b755e83ded97c6e897fa0942b11490113f09e9c443c2475", size = 303033 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bb/61/78c7b3851add1481b048b5fdc29067397a1784e2910592bc81bb3f608635/fsspec-2025.5.1-py3-none-any.whl", hash = "sha256:24d3a2e663d5fc735ab256263c4075f374a174c3410c0b25e5bd1970bceaa462", size = 199052 }, +] + +[[package]] +name = "hydra-core" +version = "1.3.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "antlr4-python3-runtime" }, + { name = "omegaconf" }, + { name = "packaging" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6d/8e/07e42bc434a847154083b315779b0a81d567154504624e181caf2c71cd98/hydra-core-1.3.2.tar.gz", hash = "sha256:8a878ed67216997c3e9d88a8e72e7b4767e81af37afb4ea3334b269a4390a824", size = 3263494 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/50/e0edd38dcd63fb26a8547f13d28f7a008bc4a3fd4eb4ff030673f22ad41a/hydra_core-1.3.2-py3-none-any.whl", hash = "sha256:fa0238a9e31df3373b35b0bfb672c34cc92718d21f81311d8996a16de1141d8b", size = 154547 }, +] + [[package]] name = "identify" version = "2.6.10" @@ -232,6 +261,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050 }, ] +[[package]] +name = "iopath" +version = "0.1.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "portalocker" }, + { name = "tqdm" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/73/b3d451dfc523756cf177d3ebb0af76dc7751b341c60e2a21871be400ae29/iopath-0.1.10.tar.gz", hash = "sha256:3311c16a4d9137223e20f141655759933e1eda24f8bff166af834af3c645ef01", size = 42226 } + +[[package]] +name = "jinja2" +version = "3.1.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/df/bf/f7da0350254c0ed7c72f3e33cef02e048281fec7ecec5f032d4aac52226b/jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d", size = 245115 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899 }, +] + [[package]] name = "kiwisolver" version = "1.4.8" @@ -255,6 +307,24 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/c9/9642ea855604aeb2968a8e145fc662edf61db7632ad2e4fb92424be6b6c0/kiwisolver-1.4.8-cp311-cp311-win_arm64.whl", hash = "sha256:16523b40aab60426ffdebe33ac374457cf62863e330a90a0383639ce14bf44b2", size = 65311 }, ] +[[package]] +name = "markupsafe" +version = "3.0.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, + { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, + { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, + { url = "https://files.pythonhosted.org/packages/f1/a4/aefb044a2cd8d7334c8a47d3fb2c9f328ac48cb349468cc31c20b539305f/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a123e330ef0853c6e822384873bef7507557d8e4a082961e1defa947aa59ba84", size = 23120 }, + { url = "https://files.pythonhosted.org/packages/8d/21/5e4851379f88f3fad1de30361db501300d4f07bcad047d3cb0449fc51f8c/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e084f686b92e5b83186b07e8a17fc09e38fff551f3602b249881fec658d3eca", size = 23032 }, + { url = "https://files.pythonhosted.org/packages/00/7b/e92c64e079b2d0d7ddf69899c98842f3f9a60a1ae72657c89ce2655c999d/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d8213e09c917a951de9d09ecee036d5c7d36cb6cb7dbaece4c71a60d79fb9798", size = 24057 }, + { url = "https://files.pythonhosted.org/packages/f9/ac/46f960ca323037caa0a10662ef97d0a4728e890334fc156b9f9e52bcc4ca/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:5b02fb34468b6aaa40dfc198d813a641e3a63b98c2b05a16b9f80b7ec314185e", size = 23359 }, + { url = "https://files.pythonhosted.org/packages/69/84/83439e16197337b8b14b6a5b9c2105fff81d42c2a7c5b58ac7b62ee2c3b1/MarkupSafe-3.0.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4", size = 23306 }, + { url = "https://files.pythonhosted.org/packages/9a/34/a15aa69f01e2181ed8d2b685c0d2f6655d5cca2c4db0ddea775e631918cd/MarkupSafe-3.0.2-cp311-cp311-win32.whl", hash = "sha256:6c89876f41da747c8d3677a2b540fb32ef5715f97b66eeb0c6b66f5e3ef6f59d", size = 15094 }, + { url = "https://files.pythonhosted.org/packages/da/b8/3a3bd761922d416f3dc5d00bfbed11f66b1ab89a0c2b6e887240a30b0f6b/MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:70a87b411535ccad5ef2f1df5136506a10775d267e197e4cf531ced10537bd6b", size = 15521 }, +] + [[package]] name = "matplotlib" version = "3.10.3" @@ -280,6 +350,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/da/948a017c3ea13fd4a97afad5fdebe2f5bbc4d28c0654510ce6fd6b06b7bd/matplotlib-3.10.3-cp311-cp311-win_amd64.whl", hash = "sha256:eef6ed6c03717083bc6d69c2d7ee8624205c29a8e6ea5a31cd3492ecdbaee1e1", size = 8065492 }, ] +[[package]] +name = "mpmath" +version = "1.3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e0/47/dd32fa426cc72114383ac549964eecb20ecfd886d1e5ccf5340b55b02f57/mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f", size = 508106 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, +] + [[package]] name = "mypy" version = "1.15.0" @@ -308,6 +387,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 }, ] +[[package]] +name = "networkx" +version = "3.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/4f/ccdb8ad3a38e583f214547fd2f7ff1fc160c43a75af88e6aec213404b96a/networkx-3.5.tar.gz", hash = "sha256:d4c6f9cf81f52d69230866796b82afbccdec3db7ae4fbd1b65ea750feed50037", size = 2471065 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl", hash = "sha256:0030d386a9a06dee3565298b4a734b68589749a544acbb6c412dc9e2489ec6ec", size = 2034406 }, +] + [[package]] name = "nodeenv" version = "1.9.1" @@ -335,6 +423,152 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/94/7a/4c00332a3ca79702bbc86228afd0e84e6f91b47222ec8cdf00677dd16481/numpy-2.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:ae8ce252404cdd4de56dcfce8b11eac3c594a9c16c231d081fb705cf23bd4d9e", size = 12870550 }, ] +[[package]] +name = "nvidia-cublas-cu12" +version = "12.6.4.1" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/af/eb/ff4b8c503fa1f1796679dce648854d58751982426e4e4b37d6fce49d259c/nvidia_cublas_cu12-12.6.4.1-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:08ed2686e9875d01b58e3cb379c6896df8e76c75e0d4a7f7dace3d7b6d9ef8eb", size = 393138322 }, +] + +[[package]] +name = "nvidia-cuda-cupti-cu12" +version = "12.6.80" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/49/60/7b6497946d74bcf1de852a21824d63baad12cd417db4195fc1bfe59db953/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:6768bad6cab4f19e8292125e5f1ac8aa7d1718704012a0e3272a6f61c4bce132", size = 8917980 }, + { url = "https://files.pythonhosted.org/packages/a5/24/120ee57b218d9952c379d1e026c4479c9ece9997a4fb46303611ee48f038/nvidia_cuda_cupti_cu12-12.6.80-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a3eff6cdfcc6a4c35db968a06fcadb061cbc7d6dde548609a941ff8701b98b73", size = 8917972 }, +] + +[[package]] +name = "nvidia-cuda-nvrtc-cu12" +version = "12.6.77" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/75/2e/46030320b5a80661e88039f59060d1790298b4718944a65a7f2aeda3d9e9/nvidia_cuda_nvrtc_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:35b0cc6ee3a9636d5409133e79273ce1f3fd087abb0532d2d2e8fff1fe9efc53", size = 23650380 }, +] + +[[package]] +name = "nvidia-cuda-runtime-cu12" +version = "12.6.77" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/23/e717c5ac26d26cf39a27fbc076240fad2e3b817e5889d671b67f4f9f49c5/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ba3b56a4f896141e25e19ab287cd71e52a6a0f4b29d0d31609f60e3b4d5219b7", size = 897690 }, + { url = "https://files.pythonhosted.org/packages/f0/62/65c05e161eeddbafeca24dc461f47de550d9fa8a7e04eb213e32b55cfd99/nvidia_cuda_runtime_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a84d15d5e1da416dd4774cb42edf5e954a3e60cc945698dc1d5be02321c44dc8", size = 897678 }, +] + +[[package]] +name = "nvidia-cudnn-cu12" +version = "9.5.1.17" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/78/4535c9c7f859a64781e43c969a3a7e84c54634e319a996d43ef32ce46f83/nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2", size = 570988386 }, +] + +[[package]] +name = "nvidia-cufft-cu12" +version = "11.3.0.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/16/73727675941ab8e6ffd86ca3a4b7b47065edcca7a997920b831f8147c99d/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5", size = 200221632 }, + { url = "https://files.pythonhosted.org/packages/60/de/99ec247a07ea40c969d904fc14f3a356b3e2a704121675b75c366b694ee1/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.whl", hash = "sha256:768160ac89f6f7b459bee747e8d175dbf53619cfe74b2a5636264163138013ca", size = 200221622 }, +] + +[[package]] +name = "nvidia-cufile-cu12" +version = "1.11.1.6" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b2/66/cc9876340ac68ae71b15c743ddb13f8b30d5244af344ec8322b449e35426/nvidia_cufile_cu12-1.11.1.6-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:cc23469d1c7e52ce6c1d55253273d32c565dd22068647f3aa59b3c6b005bf159", size = 1142103 }, +] + +[[package]] +name = "nvidia-curand-cu12" +version = "10.3.7.77" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/1b/44a01c4e70933637c93e6e1a8063d1e998b50213a6b65ac5a9169c47e98e/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a42cd1344297f70b9e39a1e4f467a4e1c10f1da54ff7a85c12197f6c652c8bdf", size = 56279010 }, + { url = "https://files.pythonhosted.org/packages/4a/aa/2c7ff0b5ee02eaef890c0ce7d4f74bc30901871c5e45dee1ae6d0083cd80/nvidia_curand_cu12-10.3.7.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:99f1a32f1ac2bd134897fc7a203f779303261268a65762a623bf30cc9fe79117", size = 56279000 }, +] + +[[package]] +name = "nvidia-cusolver-cu12" +version = "11.7.1.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cusparse-cu12" }, + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f0/6e/c2cf12c9ff8b872e92b4a5740701e51ff17689c4d726fca91875b07f655d/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c", size = 158229790 }, + { url = "https://files.pythonhosted.org/packages/9f/81/baba53585da791d043c10084cf9553e074548408e04ae884cfe9193bd484/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6cf28f17f64107a0c4d7802be5ff5537b2130bfc112f25d5a30df227058ca0e6", size = 158229780 }, +] + +[[package]] +name = "nvidia-cusparse-cu12" +version = "12.5.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "nvidia-nvjitlink-cu12" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/06/1e/b8b7c2f4099a37b96af5c9bb158632ea9e5d9d27d7391d7eb8fc45236674/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73", size = 216561367 }, + { url = "https://files.pythonhosted.org/packages/43/ac/64c4316ba163e8217a99680c7605f779accffc6a4bcd0c778c12948d3707/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:23749a6571191a215cb74d1cdbff4a86e7b19f1200c071b3fcf844a5bea23a2f", size = 216561357 }, +] + +[[package]] +name = "nvidia-cusparselt-cu12" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/9a/72ef35b399b0e183bc2e8f6f558036922d453c4d8237dab26c666a04244b/nvidia_cusparselt_cu12-0.6.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:e5c8a26c36445dd2e6812f1177978a24e2d37cacce7e090f297a688d1ec44f46", size = 156785796 }, +] + +[[package]] +name = "nvidia-nccl-cu12" +version = "2.26.2" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/ca/f42388aed0fddd64ade7493dbba36e1f534d4e6fdbdd355c6a90030ae028/nvidia_nccl_cu12-2.26.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:694cf3879a206553cc9d7dbda76b13efaf610fdb70a50cba303de1b0d1530ac6", size = 201319755 }, +] + +[[package]] +name = "nvidia-nvjitlink-cu12" +version = "12.6.85" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9d/d7/c5383e47c7e9bf1c99d5bd2a8c935af2b6d705ad831a7ec5c97db4d82f4f/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a", size = 19744971 }, +] + +[[package]] +name = "nvidia-nvtx-cu12" +version = "12.6.77" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/56/9a/fff8376f8e3d084cd1530e1ef7b879bb7d6d265620c95c1b322725c694f4/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b90bed3df379fa79afbd21be8e04a0314336b8ae16768b58f2d34cb1d04cd7d2", size = 89276 }, + { url = "https://files.pythonhosted.org/packages/9e/4e/0d0c945463719429b7bd21dece907ad0bde437a2ff12b9b12fee94722ab0/nvidia_nvtx_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:6574241a3ec5fdc9334353ab8c479fe75841dbe8f4532a8fc97ce63503330ba1", size = 89265 }, +] + +[[package]] +name = "omegaconf" +version = "2.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "antlr4-python3-runtime" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500 }, +] + [[package]] name = "opencv-python" version = "4.12.0.88" @@ -455,6 +689,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/88/5f/e351af9a41f866ac3f1fac4ca0613908d9a41741cfcf2228f4ad853b697d/pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669", size = 20556 }, ] +[[package]] +name = "portalocker" +version = "3.2.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pywin32", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/5e/77/65b857a69ed876e1951e88aaba60f5ce6120c33703f7cb61a3c894b8c1b6/portalocker-3.2.0.tar.gz", hash = "sha256:1f3002956a54a8c3730586c5c77bf18fae4149e07eaf1c29fc3faf4d5a3f89ac", size = 95644 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4b/a6/38c8e2f318bf67d338f4d629e93b0b4b9af331f455f0390ea8ce4a099b26/portalocker-3.2.0-py3-none-any.whl", hash = "sha256:3cdc5f565312224bc570c49337bd21428bba0ef363bbcf58b9ef4a9f11779968", size = 22424 }, +] + [[package]] name = "pre-commit" version = "4.2.0" @@ -625,6 +871,16 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 }, ] +[[package]] +name = "pywin32" +version = "310" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f7/b1/68aa2986129fb1011dabbe95f0136f44509afaf072b12b8f815905a39f33/pywin32-310-cp311-cp311-win32.whl", hash = "sha256:1e765f9564e83011a63321bb9d27ec456a0ed90d3732c4b2e312b855365ed8bd", size = 8784284 }, + { url = "https://files.pythonhosted.org/packages/b3/bd/d1592635992dd8db5bb8ace0551bc3a769de1ac8850200cfa517e72739fb/pywin32-310-cp311-cp311-win_amd64.whl", hash = "sha256:126298077a9d7c95c53823934f000599f66ec9296b09167810eb24875f32689c", size = 9520748 }, + { url = "https://files.pythonhosted.org/packages/90/b1/ac8b1ffce6603849eb45a91cf126c0fa5431f186c2e768bf56889c46f51c/pywin32-310-cp311-cp311-win_arm64.whl", hash = "sha256:19ec5fc9b1d51c4350be7bb00760ffce46e6c95eaf2f0b2f1150657b1a43c582", size = 8455941 }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -682,6 +938,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cd/be/f6b790d6ae98f1f32c645f8540d5c96248b72343b0a56fab3a07f2941897/ruff-0.11.8-py3-none-win_arm64.whl", hash = "sha256:304432e4c4a792e3da85b7699feb3426a0908ab98bf29df22a31b0cdd098fac2", size = 10713129 }, ] +[[package]] +name = "sam2" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "hydra-core" }, + { name = "iopath" }, + { name = "numpy" }, + { name = "pillow" }, + { name = "torch" }, + { name = "torchvision" }, + { name = "tqdm" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ce/11/d07fc96688f731a85de6d5260e98b709051eded2b7b5667ae292530bcf90/sam2-1.1.0.tar.gz", hash = "sha256:7e0ea252d43c10d853e3acfce0b5770ac683c30481bd6de311300e9d44f45b74", size = 152836 } + [[package]] name = "scipy" version = "1.16.0" @@ -728,6 +999,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/83/11/00d3c3dfc25ad54e731d91449895a79e4bf2384dc3ac01809010ba88f6d5/seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987", size = 294914 }, ] +[[package]] +name = "setuptools" +version = "80.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/18/5d/3bf57dcd21979b887f014ea83c24ae194cfcd12b9e0fda66b957c69d1fca/setuptools-80.9.0.tar.gz", hash = "sha256:f36b47402ecde768dbfafc46e8e4207b4360c654f1f3bb84475f0a28628fb19c", size = 1319958 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486 }, +] + [[package]] name = "simsimd" version = "6.5.0" @@ -764,6 +1044,7 @@ name = "ssya" source = { editable = "." } dependencies = [ { name = "dotenv" }, + { name = "sam2" }, { name = "yaya-tools" }, ] @@ -782,6 +1063,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "dotenv", specifier = ">=0.9.9" }, + { name = "sam2", specifier = ">=1.1.0" }, { name = "yaya-tools", url = "https://github.com/AISP-PL/yaya-tools/releases/download/v1.2.16/yaya_tools-1.1.0-py3-none-any.whl" }, ] @@ -843,6 +1125,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c1/24/d3bcad7ece751166ed308c6deb7e7d02a62a7f5a6e01e61ff2787c538fb0/supervision-0.25.1-py3-none-any.whl", hash = "sha256:ebc015c22983bc64563beda75f5f529e465e4020b318da07948ce03148307a72", size = 181480 }, ] +[[package]] +name = "sympy" +version = "1.14.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mpmath" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a2/09/77d55d46fd61b4a135c444fc97158ef34a095e5681d0a6c10b75bf356191/sympy-1.14.0-py3-none-any.whl", hash = "sha256:e091cc3e99d2141a0ba2847328f5479b05d94a6635cb96148ccb3f34671bd8f5", size = 6299353 }, +] + [[package]] name = "taplo" version = "0.9.3" @@ -876,6 +1170,56 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, ] +[[package]] +name = "torch" +version = "2.7.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "filelock" }, + { name = "fsspec" }, + { name = "jinja2" }, + { name = "networkx" }, + { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cufile-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-cusparselt-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "sympy" }, + { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, + { name = "typing-extensions" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/56/2eae3494e3d375533034a8e8cf0ba163363e996d85f0629441fa9d9843fe/torch-2.7.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:236f501f2e383f1cb861337bdf057712182f910f10aeaf509065d54d339e49b2", size = 99093039 }, + { url = "https://files.pythonhosted.org/packages/e5/94/34b80bd172d0072c9979708ccd279c2da2f55c3ef318eceec276ab9544a4/torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:06eea61f859436622e78dd0cdd51dbc8f8c6d76917a9cf0555a333f9eac31ec1", size = 821174704 }, + { url = "https://files.pythonhosted.org/packages/50/9e/acf04ff375b0b49a45511c55d188bcea5c942da2aaf293096676110086d1/torch-2.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:8273145a2e0a3c6f9fd2ac36762d6ee89c26d430e612b95a99885df083b04e52", size = 216095937 }, + { url = "https://files.pythonhosted.org/packages/5b/2b/d36d57c66ff031f93b4fa432e86802f84991477e522adcdffd314454326b/torch-2.7.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:aea4fc1bf433d12843eb2c6b2204861f43d8364597697074c8d38ae2507f8730", size = 68640034 }, +] + +[[package]] +name = "torchvision" +version = "0.22.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, + { name = "pillow" }, + { name = "torch" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/00/bdab236ef19da050290abc2b5203ff9945c84a1f2c7aab73e8e9c8c85669/torchvision-0.22.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4addf626e2b57fc22fd6d329cf1346d474497672e6af8383b7b5b636fba94a53", size = 1947827 }, + { url = "https://files.pythonhosted.org/packages/ac/d0/18f951b2be3cfe48c0027b349dcc6fde950e3dc95dd83e037e86f284f6fd/torchvision-0.22.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:8b4a53a6067d63adba0c52f2b8dd2290db649d642021674ee43c0c922f0c6a69", size = 2514021 }, + { url = "https://files.pythonhosted.org/packages/c3/1a/63eb241598b36d37a0221e10af357da34bd33402ccf5c0765e389642218a/torchvision-0.22.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:b7866a3b326413e67724ac46f1ee594996735e10521ba9e6cdbe0fa3cd98c2f2", size = 7487300 }, + { url = "https://files.pythonhosted.org/packages/e5/73/1b009b42fe4a7774ba19c23c26bb0f020d68525c417a348b166f1c56044f/torchvision-0.22.1-cp311-cp311-win_amd64.whl", hash = "sha256:bb3f6df6f8fd415ce38ec4fd338376ad40c62e86052d7fc706a0dd51efac1718", size = 1707989 }, +] + [[package]] name = "tqdm" version = "4.67.1" @@ -888,6 +1232,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 }, ] +[[package]] +name = "triton" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "setuptools" }, +] +wheels = [ + { url = "https://files.pythonhosted.org/packages/21/2f/3e56ea7b58f80ff68899b1dbe810ff257c9d177d288c6b0f55bf2fe4eb50/triton-3.3.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b31e3aa26f8cb3cc5bf4e187bf737cbacf17311e1112b781d4a059353dfd731b", size = 155689937 }, +] + [[package]] name = "types-requests" version = "2.32.0.20250328" diff --git a/zoo/.gitkeep b/zoo/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/zoo/.gitkepp b/zoo/.gitkepp new file mode 100644 index 0000000..e69de29