From 077f4caf332f3db5d6de1baef56cea9bf2f77800 Mon Sep 17 00:00:00 2001 From: imagra93 Date: Wed, 3 Jun 2026 08:35:30 +0000 Subject: [PATCH 1/4] fix(seg): filter unlabeled images and fix mask-loss DDP graph disconnect --- libreyolo/data/dataset.py | 24 ++++++++++++++++++++++++ libreyolo/models/rfdetr/loss.py | 10 +++++++--- libreyolo/training/trainer.py | 3 +++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/libreyolo/data/dataset.py b/libreyolo/data/dataset.py index 570fdcc9..254ea6be 100644 --- a/libreyolo/data/dataset.py +++ b/libreyolo/data/dataset.py @@ -160,6 +160,7 @@ def __init__( img_files: List[Path] | None = None, label_files: List[Path] | None = None, load_segments: bool = False, + filter_empty_annotations: bool = False, ): """ Initialize YOLO dataset. @@ -171,11 +172,15 @@ def __init__( preproc: Preprocessing transform. img_files: List of image paths (for file list mode). label_files: List of label paths (optional, inferred if not provided). + filter_empty_annotations: Drop images with no annotations and log a warning. + Should be True for segmentation training where unlabeled images provide + no gradient signal and can cause DDP static_graph crashes. """ self.img_size = img_size self.preproc = preproc self._input_dim = img_size self.load_segments = load_segments + self._filter_empty_annotations = filter_empty_annotations if img_files is not None: # File list mode (.txt format) @@ -225,6 +230,25 @@ def __init__( # Pre-load annotations self.annotations = self._load_annotations() + if self._filter_empty_annotations: + keep = [i for i, ann in enumerate(self.annotations) if ann[0].shape[0] > 0] + dropped = self.num_imgs - len(keep) + if dropped > 0 and is_main_process(): + logger.warning( + "Dropped %d image(s) with no annotations (filter_empty_annotations=True). " + "%d image(s) remain.", + dropped, + len(keep), + ) + self.annotations = [self.annotations[i] for i in keep] + self.img_files = [self.img_files[i] for i in keep] + self.label_files = [self.label_files[i] for i in keep] + if self.segments is not None: + self.segments = [self.segments[i] for i in keep] + self.num_imgs = len(self.annotations) + if self.num_imgs == 0: + raise ValueError("No labeled images remain after filtering empty annotations.") + def _load_annotations(self) -> List: """Load all annotations.""" total = len(self.img_files) diff --git a/libreyolo/models/rfdetr/loss.py b/libreyolo/models/rfdetr/loss.py index 2d41b65e..33daea21 100644 --- a/libreyolo/models/rfdetr/loss.py +++ b/libreyolo/models/rfdetr/loss.py @@ -394,10 +394,14 @@ def loss_masks(self, outputs, targets, indices, num_boxes): spatial_features = outputs["pred_masks"]["spatial_features"] query_features = outputs["pred_masks"]["query_features"] bias = outputs["pred_masks"]["bias"] - # If there are no matches, return an empty tensor like the Tensor branch does. if idx[0].numel() == 0: - device = spatial_features.device - src_masks = torch.tensor([], device=device) + # Return zero losses that ARE connected to the mask head tensors so + # every mask-head parameter still receives a (zero) gradient. + # torch.tensor([]) has no grad_fn and silently drops those params from + # the backward graph, which violates DDP static_graph=True and causes + # a "finished reduction" crash whenever a rank sees an all-unlabeled batch. + zero = spatial_features.sum() * 0.0 + query_features.sum() * 0.0 + bias * 0.0 + return {"loss_mask_ce": zero, "loss_mask_dice": zero} else: batched_selected_masks = [] per_batch_counts = idx[0].unique(return_counts=True)[1] diff --git a/libreyolo/training/trainer.py b/libreyolo/training/trainer.py index bc5a266d..e44d5fd5 100644 --- a/libreyolo/training/trainer.py +++ b/libreyolo/training/trainer.py @@ -386,6 +386,7 @@ def _setup_data(self): img_size=img_size, preproc=preproc, load_segments=load_segments, + filter_empty_annotations=load_segments, ) elif ann_file.exists(): train_dataset = COCODataset( @@ -418,6 +419,7 @@ def _setup_data(self): img_size=img_size, preproc=preproc, load_segments=load_segments, + filter_empty_annotations=load_segments, ) elif self.config.data_dir: data_dir = self.config.data_dir @@ -439,6 +441,7 @@ def _setup_data(self): img_size=img_size, preproc=preproc, load_segments=load_segments, + filter_empty_annotations=load_segments, ) else: raise ValueError("Either 'data' or 'data_dir' must be specified") From 1c4ddb072eceee1332a8419b98edc483c285a340 Mon Sep 17 00:00:00 2001 From: imagra93 Date: Wed, 3 Jun 2026 09:05:32 +0000 Subject: [PATCH 2/4] test(dataset): cover filter_empty_annotations behavior on YOLODataset --- tests/unit/test_dataset_loading.py | 66 ++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/tests/unit/test_dataset_loading.py b/tests/unit/test_dataset_loading.py index bc5657b7..d95dd6f0 100644 --- a/tests/unit/test_dataset_loading.py +++ b/tests/unit/test_dataset_loading.py @@ -1,5 +1,6 @@ """Tests for dataset annotation loading.""" +import logging import numpy as np import pytest from pathlib import Path @@ -101,6 +102,71 @@ def test_create_dataloader_drop_last_only_when_safe( assert len(loader) == expected_batches +def _build_mixed_dataset(tmp_path, label_contents, **kwargs): + """Create a YOLODataset from a list of label file bodies (empty string = unlabeled).""" + image_dir = tmp_path / "images" + label_dir = tmp_path / "labels" + image_dir.mkdir(exist_ok=True) + label_dir.mkdir(exist_ok=True) + + img_files, label_files = [], [] + for i, body in enumerate(label_contents): + img = image_dir / f"img_{i}.jpg" + lbl = label_dir / f"img_{i}.txt" + Image.new("RGB", (64, 64), color="white").save(img) + lbl.write_text(body) + img_files.append(img) + label_files.append(lbl) + + return YOLODataset(img_files=img_files, label_files=label_files, img_size=(64, 64), **kwargs) + + +def test_filter_empty_annotations_drops_unlabeled_images(tmp_path): + labeled = "0 0.5 0.5 0.25 0.5\n" + ds = _build_mixed_dataset(tmp_path, [labeled, "", labeled], filter_empty_annotations=True) + + assert ds.num_imgs == 2 + assert len(ds.annotations) == 2 + assert len(ds.img_files) == 2 + assert len(ds.label_files) == 2 + assert all(ann[0].shape[0] > 0 for ann in ds.annotations) + assert [f.name for f in ds.img_files] == ["img_0.jpg", "img_2.jpg"] + + +def test_filter_empty_annotations_logs_warning(tmp_path, caplog): + labeled = "0 0.5 0.5 0.25 0.5\n" + with caplog.at_level(logging.WARNING, logger="libreyolo.data.dataset"): + _build_mixed_dataset(tmp_path, [labeled, "", ""], filter_empty_annotations=True) + + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert len(warnings) == 1 + msg = warnings[0].getMessage() + assert "2" in msg and "1" in msg # 2 dropped, 1 remain + + +def test_filter_empty_annotations_raises_when_all_unlabeled(tmp_path): + with pytest.raises(ValueError, match="No labeled images remain"): + _build_mixed_dataset(tmp_path, ["", ""], filter_empty_annotations=True) + + +def test_filter_empty_annotations_false_keeps_unlabeled(tmp_path): + labeled = "0 0.5 0.5 0.25 0.5\n" + ds = _build_mixed_dataset(tmp_path, [labeled, ""], filter_empty_annotations=False) + + assert ds.num_imgs == 2 + + +def test_filter_empty_annotations_syncs_segments(tmp_path): + # Polygon label: class + 4 xy pairs (> 5 parts triggers segment parsing) + poly = "0 0.1 0.1 0.9 0.1 0.9 0.9 0.1 0.9\n" + ds = _build_mixed_dataset( + tmp_path, [poly, "", poly], filter_empty_annotations=True, load_segments=True + ) + + assert ds.num_imgs == 2 + assert len(ds.segments) == 2 + + def test_create_dataloader_uses_sampler_visible_size(): sampler = SubsetRandomSampler([0, 1]) loader = create_dataloader( From 1c277c67c331867d1f458aabf99516ce10e99686 Mon Sep 17 00:00:00 2001 From: imagra93 Date: Fri, 5 Jun 2026 07:40:08 +0000 Subject: [PATCH 3/4] fix(seg): remove filter_empty_annotations, rely on zero-grad loss fix --- libreyolo/data/dataset.py | 24 ----------- libreyolo/training/trainer.py | 3 -- tests/unit/test_dataset_loading.py | 66 ------------------------------ 3 files changed, 93 deletions(-) diff --git a/libreyolo/data/dataset.py b/libreyolo/data/dataset.py index 254ea6be..570fdcc9 100644 --- a/libreyolo/data/dataset.py +++ b/libreyolo/data/dataset.py @@ -160,7 +160,6 @@ def __init__( img_files: List[Path] | None = None, label_files: List[Path] | None = None, load_segments: bool = False, - filter_empty_annotations: bool = False, ): """ Initialize YOLO dataset. @@ -172,15 +171,11 @@ def __init__( preproc: Preprocessing transform. img_files: List of image paths (for file list mode). label_files: List of label paths (optional, inferred if not provided). - filter_empty_annotations: Drop images with no annotations and log a warning. - Should be True for segmentation training where unlabeled images provide - no gradient signal and can cause DDP static_graph crashes. """ self.img_size = img_size self.preproc = preproc self._input_dim = img_size self.load_segments = load_segments - self._filter_empty_annotations = filter_empty_annotations if img_files is not None: # File list mode (.txt format) @@ -230,25 +225,6 @@ def __init__( # Pre-load annotations self.annotations = self._load_annotations() - if self._filter_empty_annotations: - keep = [i for i, ann in enumerate(self.annotations) if ann[0].shape[0] > 0] - dropped = self.num_imgs - len(keep) - if dropped > 0 and is_main_process(): - logger.warning( - "Dropped %d image(s) with no annotations (filter_empty_annotations=True). " - "%d image(s) remain.", - dropped, - len(keep), - ) - self.annotations = [self.annotations[i] for i in keep] - self.img_files = [self.img_files[i] for i in keep] - self.label_files = [self.label_files[i] for i in keep] - if self.segments is not None: - self.segments = [self.segments[i] for i in keep] - self.num_imgs = len(self.annotations) - if self.num_imgs == 0: - raise ValueError("No labeled images remain after filtering empty annotations.") - def _load_annotations(self) -> List: """Load all annotations.""" total = len(self.img_files) diff --git a/libreyolo/training/trainer.py b/libreyolo/training/trainer.py index e44d5fd5..bc5a266d 100644 --- a/libreyolo/training/trainer.py +++ b/libreyolo/training/trainer.py @@ -386,7 +386,6 @@ def _setup_data(self): img_size=img_size, preproc=preproc, load_segments=load_segments, - filter_empty_annotations=load_segments, ) elif ann_file.exists(): train_dataset = COCODataset( @@ -419,7 +418,6 @@ def _setup_data(self): img_size=img_size, preproc=preproc, load_segments=load_segments, - filter_empty_annotations=load_segments, ) elif self.config.data_dir: data_dir = self.config.data_dir @@ -441,7 +439,6 @@ def _setup_data(self): img_size=img_size, preproc=preproc, load_segments=load_segments, - filter_empty_annotations=load_segments, ) else: raise ValueError("Either 'data' or 'data_dir' must be specified") diff --git a/tests/unit/test_dataset_loading.py b/tests/unit/test_dataset_loading.py index d95dd6f0..bc5657b7 100644 --- a/tests/unit/test_dataset_loading.py +++ b/tests/unit/test_dataset_loading.py @@ -1,6 +1,5 @@ """Tests for dataset annotation loading.""" -import logging import numpy as np import pytest from pathlib import Path @@ -102,71 +101,6 @@ def test_create_dataloader_drop_last_only_when_safe( assert len(loader) == expected_batches -def _build_mixed_dataset(tmp_path, label_contents, **kwargs): - """Create a YOLODataset from a list of label file bodies (empty string = unlabeled).""" - image_dir = tmp_path / "images" - label_dir = tmp_path / "labels" - image_dir.mkdir(exist_ok=True) - label_dir.mkdir(exist_ok=True) - - img_files, label_files = [], [] - for i, body in enumerate(label_contents): - img = image_dir / f"img_{i}.jpg" - lbl = label_dir / f"img_{i}.txt" - Image.new("RGB", (64, 64), color="white").save(img) - lbl.write_text(body) - img_files.append(img) - label_files.append(lbl) - - return YOLODataset(img_files=img_files, label_files=label_files, img_size=(64, 64), **kwargs) - - -def test_filter_empty_annotations_drops_unlabeled_images(tmp_path): - labeled = "0 0.5 0.5 0.25 0.5\n" - ds = _build_mixed_dataset(tmp_path, [labeled, "", labeled], filter_empty_annotations=True) - - assert ds.num_imgs == 2 - assert len(ds.annotations) == 2 - assert len(ds.img_files) == 2 - assert len(ds.label_files) == 2 - assert all(ann[0].shape[0] > 0 for ann in ds.annotations) - assert [f.name for f in ds.img_files] == ["img_0.jpg", "img_2.jpg"] - - -def test_filter_empty_annotations_logs_warning(tmp_path, caplog): - labeled = "0 0.5 0.5 0.25 0.5\n" - with caplog.at_level(logging.WARNING, logger="libreyolo.data.dataset"): - _build_mixed_dataset(tmp_path, [labeled, "", ""], filter_empty_annotations=True) - - warnings = [r for r in caplog.records if r.levelno == logging.WARNING] - assert len(warnings) == 1 - msg = warnings[0].getMessage() - assert "2" in msg and "1" in msg # 2 dropped, 1 remain - - -def test_filter_empty_annotations_raises_when_all_unlabeled(tmp_path): - with pytest.raises(ValueError, match="No labeled images remain"): - _build_mixed_dataset(tmp_path, ["", ""], filter_empty_annotations=True) - - -def test_filter_empty_annotations_false_keeps_unlabeled(tmp_path): - labeled = "0 0.5 0.5 0.25 0.5\n" - ds = _build_mixed_dataset(tmp_path, [labeled, ""], filter_empty_annotations=False) - - assert ds.num_imgs == 2 - - -def test_filter_empty_annotations_syncs_segments(tmp_path): - # Polygon label: class + 4 xy pairs (> 5 parts triggers segment parsing) - poly = "0 0.1 0.1 0.9 0.1 0.9 0.9 0.1 0.9\n" - ds = _build_mixed_dataset( - tmp_path, [poly, "", poly], filter_empty_annotations=True, load_segments=True - ) - - assert ds.num_imgs == 2 - assert len(ds.segments) == 2 - - def test_create_dataloader_uses_sampler_visible_size(): sampler = SubsetRandomSampler([0, 1]) loader = create_dataloader( From 6932f166723f0b91b6a627fa86661fd8f4c9e2f8 Mon Sep 17 00:00:00 2001 From: Xuban <59646791+EHxuban11@users.noreply.github.com> Date: Fri, 5 Jun 2026 11:48:32 +0200 Subject: [PATCH 4/4] test rfdetr no-match mask loss --- libreyolo/models/rfdetr/loss.py | 2 +- .../unit/test_rfdetr_seg_ddp_static_graph.py | 48 +++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/libreyolo/models/rfdetr/loss.py b/libreyolo/models/rfdetr/loss.py index 33daea21..8c4a70b2 100644 --- a/libreyolo/models/rfdetr/loss.py +++ b/libreyolo/models/rfdetr/loss.py @@ -400,7 +400,7 @@ def loss_masks(self, outputs, targets, indices, num_boxes): # torch.tensor([]) has no grad_fn and silently drops those params from # the backward graph, which violates DDP static_graph=True and causes # a "finished reduction" crash whenever a rank sees an all-unlabeled batch. - zero = spatial_features.sum() * 0.0 + query_features.sum() * 0.0 + bias * 0.0 + zero = spatial_features.sum() * 0.0 + query_features.sum() * 0.0 + bias.sum() * 0.0 return {"loss_mask_ce": zero, "loss_mask_dice": zero} else: batched_selected_masks = [] diff --git a/tests/unit/test_rfdetr_seg_ddp_static_graph.py b/tests/unit/test_rfdetr_seg_ddp_static_graph.py index 9d937900..1dc488ab 100644 --- a/tests/unit/test_rfdetr_seg_ddp_static_graph.py +++ b/tests/unit/test_rfdetr_seg_ddp_static_graph.py @@ -14,9 +14,11 @@ from __future__ import annotations import pytest +import torch pytestmark = pytest.mark.unit +rfdetr_loss = pytest.importorskip("libreyolo.models.rfdetr.loss") rfdetr_trainer = pytest.importorskip("libreyolo.models.rfdetr.trainer") @@ -44,3 +46,49 @@ def test_det_trainer_ddp_uses_find_unused_not_static_graph(): kwargs = trainer._ddp_kwargs() assert kwargs["find_unused_parameters"] is True assert kwargs["static_graph"] is False + + +def test_mask_loss_no_match_zero_stays_connected_to_mask_head_tensors(): + criterion = rfdetr_loss.SetCriterion( + num_classes=1, + matcher=None, + weight_dict={}, + focal_alpha=0.25, + losses=["masks"], + ) + + spatial_features = torch.randn(2, 4, 8, 8, requires_grad=True) + query_features = torch.randn(2, 5, 4, requires_grad=True) + bias = torch.randn(1, requires_grad=True) + outputs = { + "pred_masks": { + "spatial_features": spatial_features, + "query_features": query_features, + "bias": bias, + } + } + targets = [ + { + "labels": torch.zeros(0, dtype=torch.long), + "boxes": torch.zeros(0, 4), + "masks": torch.zeros(0, 8, 8, dtype=torch.bool), + } + for _ in range(2) + ] + indices = [ + (torch.empty(0, dtype=torch.long), torch.empty(0, dtype=torch.long)) + for _ in targets + ] + + losses = criterion.loss_masks(outputs, targets, indices, num_boxes=1.0) + loss = losses["loss_mask_ce"] + losses["loss_mask_dice"] + loss.backward() + + assert loss.ndim == 0 + assert loss.item() == 0.0 + assert spatial_features.grad is not None + assert query_features.grad is not None + assert bias.grad is not None + assert spatial_features.grad.abs().sum().item() == 0.0 + assert query_features.grad.abs().sum().item() == 0.0 + assert bias.grad.abs().sum().item() == 0.0