From ad206b89331be9c7e1df3d43cb744da57b4f0688 Mon Sep 17 00:00:00 2001 From: Vision Analysis Bench Date: Sat, 25 Apr 2026 13:05:18 +0000 Subject: [PATCH] Add A100 PyTorch/CUDA YOLO-NAS benchmark submissions (3 variants) 3 new submissions for YOLO-NAS on NVIDIA A100-PCIE-40GB (MIG 7g.40gb slice) with torch 2.6.0+cu124. Variants: s, m, l. Provenance / libreyolo_commit ----------------------------- These runs were executed against libreyolo at commit 3383a8f142a5decc735f362258f6851d3f026fa3 (the 106-add-d-fine-model-family feature branch, which carries the YOLO-NAS port) plus a single small local patch to libreyolo/models/yolonas/utils.py::postprocess. To stay honest about provenance, libreyolo_commit on the 3 submission JSONs is "unknown". The local patch replaces a per-class Python NMS loop (one torchvision.ops.nms call per surviving class) with a single torchvision.ops.batched_nms call, plus a top-k=1000 pre-NMS filter that mirrors super_gradients' YoloNASPostPredictionCallback default (num_pre_nms_predictions=1000). Without this patch, YOLO-NAS at conf=0.001 (COCO eval default) keeps ~all 8400 anchors past the conf gate and dispatches one small NMS kernel per surviving class, which on A100 MIG measured at ~700 ms/image. Patched: ~60 ms/image, 10x faster. Verified mAP-neutral: same image, same seeds, mAP_50_95 differs by 0.0004 between patched and unpatched (numerical reordering only). batched_nms with the per-class idxs argument is mathematically identical to the per-class loop, and top-k=1000 matches super_gradients' COCO eval default. A LibreYOLO upstream issue is open to track moving the per-class loop in libreyolo/utils/general.py::postprocess_detections to batched_nms across the board (helps YOLOX / YOLOv9 / RT-DETR / D-FINE too, just less catastrophically). When that lands, these submissions can be backfilled with the merged libreyolo_commit. Metadata changes ---------------- - support-matrix.json: append the 3 yolonas-{s,m,l} model ids. No SHA bump (libreyolo_commit is "unknown" so the matrix doesn't gate this). - website/src/data/metadata/families.json: add yolonas family (Deci, acquired by NVIDIA; 2023). - website/src/data/metadata/models.json: add 3 YOLO-NAS variant entries. Measured COCO val2017 mAP@50-95 (paper reference in parens): yolonas-s 0.4645 (~0.475) yolonas-m 0.5053 (~0.516) yolonas-l 0.5119 (~0.522) Local: scripts/validate_submission.py and scripts/build_verified_results.py both exit 0. generated/verified-results.v1.json regenerated and committed. --- generated/verified-results.v1.json | 372 +++++++++++++++++- ...h__cuda__a100__20260425T130120704653Z.json | 108 +++++ ...h__cuda__a100__20260425T125432622272Z.json | 108 +++++ ...h__cuda__a100__20260425T124733090752Z.json | 108 +++++ support-matrix.json | 5 +- website/src/data/metadata/families.json | 18 + website/src/data/metadata/models.json | 90 +++++ 7 files changed, 787 insertions(+), 22 deletions(-) create mode 100644 submissions/yolonas-l__pytorch__cuda__a100__20260425T130120704653Z.json create mode 100644 submissions/yolonas-m__pytorch__cuda__a100__20260425T125432622272Z.json create mode 100644 submissions/yolonas-s__pytorch__cuda__a100__20260425T124733090752Z.json diff --git a/generated/verified-results.v1.json b/generated/verified-results.v1.json index fbae32d..9e556dc 100644 --- a/generated/verified-results.v1.json +++ b/generated/verified-results.v1.json @@ -1,6 +1,6 @@ { "schema_version": "va.verified-results.v1", - "generated_at": "2026-04-25T11:29:05.304607Z", + "generated_at": "2026-04-25T13:01:20.704653Z", "results": [ { "schema_version": "va.submission.v1", @@ -110,7 +110,7 @@ "device": "gpu" }, "source_file": "yolox-nano__pytorch__cuda__a100__20260422T210738517662Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -220,7 +220,7 @@ "device": "gpu" }, "source_file": "yolox-tiny__pytorch__cuda__a100__20260422T211048259826Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -330,7 +330,7 @@ "device": "gpu" }, "source_file": "yolox-s__pytorch__cuda__a100__20260422T211415726273Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -440,7 +440,7 @@ "device": "gpu" }, "source_file": "yolox-m__pytorch__cuda__a100__20260422T211755804986Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -550,7 +550,7 @@ "device": "gpu" }, "source_file": "yolox-l__pytorch__cuda__a100__20260422T212137557221Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -660,7 +660,7 @@ "device": "gpu" }, "source_file": "yolox-x__pytorch__cuda__a100__20260422T212545950868Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -770,7 +770,7 @@ "device": "gpu" }, "source_file": "yolov9t__pytorch__cuda__a100__20260422T213021533387Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -880,7 +880,7 @@ "device": "gpu" }, "source_file": "yolov9s__pytorch__cuda__a100__20260422T213440148659Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -990,7 +990,7 @@ "device": "gpu" }, "source_file": "yolov9m__pytorch__cuda__a100__20260422T213842693352Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1100,7 +1100,7 @@ "device": "gpu" }, "source_file": "yolov9c__pytorch__cuda__a100__20260422T214225175023Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1210,7 +1210,7 @@ "device": "gpu" }, "source_file": "rtdetr-r18__pytorch__cuda__a100__20260425T102126737426Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1320,7 +1320,7 @@ "device": "gpu" }, "source_file": "rtdetr-r34__pytorch__cuda__a100__20260425T102653752131Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1430,7 +1430,7 @@ "device": "gpu" }, "source_file": "rtdetr-r50__pytorch__cuda__a100__20260425T103244508474Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1540,7 +1540,7 @@ "device": "gpu" }, "source_file": "rtdetr-r50m__pytorch__cuda__a100__20260425T103803976286Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1650,7 +1650,7 @@ "device": "gpu" }, "source_file": "rtdetr-r101__pytorch__cuda__a100__20260425T104504900279Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1760,7 +1760,7 @@ "device": "gpu" }, "source_file": "dfine-n__pytorch__cuda__a100__20260425T110230888980Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1870,7 +1870,7 @@ "device": "gpu" }, "source_file": "dfine-s__pytorch__cuda__a100__20260425T110816561439Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -1980,7 +1980,7 @@ "device": "gpu" }, "source_file": "dfine-m__pytorch__cuda__a100__20260425T111412510494Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -2090,7 +2090,7 @@ "device": "gpu" }, "source_file": "dfine-l__pytorch__cuda__a100__20260425T112118848941Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" }, { "schema_version": "va.submission.v1", @@ -2200,7 +2200,337 @@ "device": "gpu" }, "source_file": "dfine-x__pytorch__cuda__a100__20260425T112905304607Z.json", - "verified_at": "2026-04-25T11:29:05.304607Z" + "verified_at": "2026-04-25T13:01:20.704653Z" + }, + { + "schema_version": "va.submission.v1", + "submission_id": "yolonas-s-pytorch-cuda-a100-20260425T124733Z", + "created_at": "2026-04-25T12:47:33.090752Z", + "benchmark": { + "harness": "vision-analysis-benchmark", + "harness_version": "2.0.0", + "libreyolo_version": "1.0.0", + "libreyolo_commit": "unknown" + }, + "model": { + "id": "yolonas-s", + "name": "yolo-nas-s", + "family": "yolonas", + "variant": "s", + "source": "libreyolo", + "weights": "LibreYOLONASs.pt", + "input_size": 640 + }, + "dataset": { + "id": "coco2017", + "split": "val2017", + "num_images": 5000 + }, + "config": { + "batch_size": 1, + "input_size": 640, + "conf": 0.001, + "iou": 0.6, + "max_det": 300 + }, + "hardware": { + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_memory_gb": 0.0, + "driver_version": "535.161.07", + "cuda_version": "12.4", + "cpu": "Intel(R) Xeon(R) Gold 6240R CPU @ 2.40GHz", + "cpu_cores": 96, + "ram_gb": 188, + "id": "a100" + }, + "software": { + "python": "3.10.6", + "torch": "2.6.0+cu124", + "libreyolo": "1.0.0", + "libreyolo_commit": "unknown", + "onnxruntime": "not-installed" + }, + "accuracy": { + "mAP_50_95": 0.4645304547401187, + "mAP_50": 0.642008179059548, + "mAP_75": 0.5008371740883274, + "mAP_small": 0.27570486453310866, + "mAP_medium": 0.5189450810664605, + "mAP_large": 0.6268924669881927, + "AR1": 0.35814680053861303, + "AR10": 0.572094540726922, + "AR100": 0.6124886464322183, + "AR_small": 0.40104974162181756, + "AR_medium": 0.6784688448552201, + "AR_large": 0.7894890181876775 + }, + "timing": { + "batch_size": 1, + "num_images": 5000, + "total_ms": { + "mean": 60.06, + "std": 12.719, + "p50": 59.968, + "p95": 66.258, + "p99": 69.25, + "preprocess_ms": 4.559, + "inference_ms": 19.195, + "postprocess_ms": 36.306 + } + }, + "throughput": { + "fps_mean": 16.65, + "fps_p50": 16.68 + }, + "model_stats": { + "params_millions": 19.05, + "gflops": 32.8 + }, + "memory": { + "peak_vram_mb": 173.8, + "peak_ram_mb": 753.9 + }, + "metadata": { + "benchmark_date": "2026-04-25T12:47:33.090752Z", + "benchmark_version": "2.0.0" + }, + "eval": { + "dataset": "coco", + "split": "val2017", + "numImages": 5000 + }, + "implementation": { + "provider": "libreyolo", + "version": "1.0.0" + }, + "runtime": { + "format": "pytorch", + "precision": "fp32", + "provider": "cuda", + "device": "gpu" + }, + "source_file": "yolonas-s__pytorch__cuda__a100__20260425T124733090752Z.json", + "verified_at": "2026-04-25T13:01:20.704653Z" + }, + { + "schema_version": "va.submission.v1", + "submission_id": "yolonas-m-pytorch-cuda-a100-20260425T125432Z", + "created_at": "2026-04-25T12:54:32.622272Z", + "benchmark": { + "harness": "vision-analysis-benchmark", + "harness_version": "2.0.0", + "libreyolo_version": "1.0.0", + "libreyolo_commit": "unknown" + }, + "model": { + "id": "yolonas-m", + "name": "yolo-nas-m", + "family": "yolonas", + "variant": "m", + "source": "libreyolo", + "weights": "LibreYOLONASm.pt", + "input_size": 640 + }, + "dataset": { + "id": "coco2017", + "split": "val2017", + "num_images": 5000 + }, + "config": { + "batch_size": 1, + "input_size": 640, + "conf": 0.001, + "iou": 0.6, + "max_det": 300 + }, + "hardware": { + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_memory_gb": 0.0, + "driver_version": "535.161.07", + "cuda_version": "12.4", + "cpu": "Intel(R) Xeon(R) Gold 6240R CPU @ 2.40GHz", + "cpu_cores": 96, + "ram_gb": 188, + "id": "a100" + }, + "software": { + "python": "3.10.6", + "torch": "2.6.0+cu124", + "libreyolo": "1.0.0", + "libreyolo_commit": "unknown", + "onnxruntime": "not-installed" + }, + "accuracy": { + "mAP_50_95": 0.5053201009790595, + "mAP_50": 0.680631835509829, + "mAP_75": 0.5481069347240959, + "mAP_small": 0.33341324057751653, + "mAP_medium": 0.5609877817843523, + "mAP_large": 0.66780249864481, + "AR1": 0.3796781183403016, + "AR10": 0.6093526447279121, + "AR100": 0.6531268343504432, + "AR_small": 0.46148355958035026, + "AR_medium": 0.7111089195691962, + "AR_large": 0.8183129469284762 + }, + "timing": { + "batch_size": 1, + "num_images": 5000, + "total_ms": { + "mean": 58.919, + "std": 11.085, + "p50": 58.852, + "p95": 66.654, + "p99": 70.503, + "preprocess_ms": 4.523, + "inference_ms": 20.288, + "postprocess_ms": 34.108 + } + }, + "throughput": { + "fps_mean": 16.97, + "fps_p50": 16.99 + }, + "model_stats": { + "params_millions": 51.18, + "gflops": 88.9 + }, + "memory": { + "peak_vram_mb": 362.8, + "peak_ram_mb": 765.2 + }, + "metadata": { + "benchmark_date": "2026-04-25T12:54:32.622272Z", + "benchmark_version": "2.0.0" + }, + "eval": { + "dataset": "coco", + "split": "val2017", + "numImages": 5000 + }, + "implementation": { + "provider": "libreyolo", + "version": "1.0.0" + }, + "runtime": { + "format": "pytorch", + "precision": "fp32", + "provider": "cuda", + "device": "gpu" + }, + "source_file": "yolonas-m__pytorch__cuda__a100__20260425T125432622272Z.json", + "verified_at": "2026-04-25T13:01:20.704653Z" + }, + { + "schema_version": "va.submission.v1", + "submission_id": "yolonas-l-pytorch-cuda-a100-20260425T130120Z", + "created_at": "2026-04-25T13:01:20.704653Z", + "benchmark": { + "harness": "vision-analysis-benchmark", + "harness_version": "2.0.0", + "libreyolo_version": "1.0.0", + "libreyolo_commit": "unknown" + }, + "model": { + "id": "yolonas-l", + "name": "yolo-nas-l", + "family": "yolonas", + "variant": "l", + "source": "libreyolo", + "weights": "LibreYOLONASl.pt", + "input_size": 640 + }, + "dataset": { + "id": "coco2017", + "split": "val2017", + "num_images": 5000 + }, + "config": { + "batch_size": 1, + "input_size": 640, + "conf": 0.001, + "iou": 0.6, + "max_det": 300 + }, + "hardware": { + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_memory_gb": 0.0, + "driver_version": "535.161.07", + "cuda_version": "12.4", + "cpu": "Intel(R) Xeon(R) Gold 6240R CPU @ 2.40GHz", + "cpu_cores": 96, + "ram_gb": 188, + "id": "a100" + }, + "software": { + "python": "3.10.6", + "torch": "2.6.0+cu124", + "libreyolo": "1.0.0", + "libreyolo_commit": "unknown", + "onnxruntime": "not-installed" + }, + "accuracy": { + "mAP_50_95": 0.511904853506801, + "mAP_50": 0.6863878214075365, + "mAP_75": 0.5560380536920484, + "mAP_small": 0.3304497103212915, + "mAP_medium": 0.5651719295465955, + "mAP_large": 0.6750978071349164, + "AR1": 0.3810695537106214, + "AR10": 0.6178930708184677, + "AR100": 0.6601012717014813, + "AR_small": 0.46589825562591036, + "AR_medium": 0.7203184015219034, + "AR_large": 0.8292781654676347 + }, + "timing": { + "batch_size": 1, + "num_images": 5000, + "total_ms": { + "mean": 60.415, + "std": 9.653, + "p50": 60.344, + "p95": 68.169, + "p99": 71.406, + "preprocess_ms": 4.524, + "inference_ms": 22.633, + "postprocess_ms": 33.258 + } + }, + "throughput": { + "fps_mean": 16.55, + "fps_p50": 16.57 + }, + "model_stats": { + "params_millions": 66.98, + "gflops": 116.6 + }, + "memory": { + "peak_vram_mb": 470.3, + "peak_ram_mb": 261.1 + }, + "metadata": { + "benchmark_date": "2026-04-25T13:01:20.704653Z", + "benchmark_version": "2.0.0" + }, + "eval": { + "dataset": "coco", + "split": "val2017", + "numImages": 5000 + }, + "implementation": { + "provider": "libreyolo", + "version": "1.0.0" + }, + "runtime": { + "format": "pytorch", + "precision": "fp32", + "provider": "cuda", + "device": "gpu" + }, + "source_file": "yolonas-l__pytorch__cuda__a100__20260425T130120704653Z.json", + "verified_at": "2026-04-25T13:01:20.704653Z" } ] } diff --git a/submissions/yolonas-l__pytorch__cuda__a100__20260425T130120704653Z.json b/submissions/yolonas-l__pytorch__cuda__a100__20260425T130120704653Z.json new file mode 100644 index 0000000..b4e8198 --- /dev/null +++ b/submissions/yolonas-l__pytorch__cuda__a100__20260425T130120704653Z.json @@ -0,0 +1,108 @@ +{ + "schema_version": "va.submission.v1", + "submission_id": "yolonas-l-pytorch-cuda-a100-20260425T130120Z", + "created_at": "2026-04-25T13:01:20.704653Z", + "benchmark": { + "harness": "vision-analysis-benchmark", + "harness_version": "2.0.0", + "libreyolo_version": "1.0.0", + "libreyolo_commit": "unknown" + }, + "model": { + "id": "yolonas-l", + "name": "yolo-nas-l", + "family": "yolonas", + "variant": "l", + "source": "libreyolo", + "weights": "LibreYOLONASl.pt", + "input_size": 640 + }, + "dataset": { + "id": "coco2017", + "split": "val2017", + "num_images": 5000 + }, + "config": { + "batch_size": 1, + "input_size": 640, + "conf": 0.001, + "iou": 0.6, + "max_det": 300 + }, + "hardware": { + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_memory_gb": 0.0, + "driver_version": "535.161.07", + "cuda_version": "12.4", + "cpu": "Intel(R) Xeon(R) Gold 6240R CPU @ 2.40GHz", + "cpu_cores": 96, + "ram_gb": 188, + "id": "a100" + }, + "software": { + "python": "3.10.6", + "torch": "2.6.0+cu124", + "libreyolo": "1.0.0", + "libreyolo_commit": "unknown", + "onnxruntime": "not-installed" + }, + "accuracy": { + "mAP_50_95": 0.511904853506801, + "mAP_50": 0.6863878214075365, + "mAP_75": 0.5560380536920484, + "mAP_small": 0.3304497103212915, + "mAP_medium": 0.5651719295465955, + "mAP_large": 0.6750978071349164, + "AR1": 0.3810695537106214, + "AR10": 0.6178930708184677, + "AR100": 0.6601012717014813, + "AR_small": 0.46589825562591036, + "AR_medium": 0.7203184015219034, + "AR_large": 0.8292781654676347 + }, + "timing": { + "batch_size": 1, + "num_images": 5000, + "total_ms": { + "mean": 60.415, + "std": 9.653, + "p50": 60.344, + "p95": 68.169, + "p99": 71.406, + "preprocess_ms": 4.524, + "inference_ms": 22.633, + "postprocess_ms": 33.258 + } + }, + "throughput": { + "fps_mean": 16.55, + "fps_p50": 16.57 + }, + "model_stats": { + "params_millions": 66.98, + "gflops": 116.6 + }, + "memory": { + "peak_vram_mb": 470.3, + "peak_ram_mb": 261.1 + }, + "metadata": { + "benchmark_date": "2026-04-25T13:01:20.704653Z", + "benchmark_version": "2.0.0" + }, + "eval": { + "dataset": "coco", + "split": "val2017", + "numImages": 5000 + }, + "implementation": { + "provider": "libreyolo", + "version": "1.0.0" + }, + "runtime": { + "format": "pytorch", + "precision": "fp32", + "provider": "cuda", + "device": "gpu" + } +} diff --git a/submissions/yolonas-m__pytorch__cuda__a100__20260425T125432622272Z.json b/submissions/yolonas-m__pytorch__cuda__a100__20260425T125432622272Z.json new file mode 100644 index 0000000..46a3af7 --- /dev/null +++ b/submissions/yolonas-m__pytorch__cuda__a100__20260425T125432622272Z.json @@ -0,0 +1,108 @@ +{ + "schema_version": "va.submission.v1", + "submission_id": "yolonas-m-pytorch-cuda-a100-20260425T125432Z", + "created_at": "2026-04-25T12:54:32.622272Z", + "benchmark": { + "harness": "vision-analysis-benchmark", + "harness_version": "2.0.0", + "libreyolo_version": "1.0.0", + "libreyolo_commit": "unknown" + }, + "model": { + "id": "yolonas-m", + "name": "yolo-nas-m", + "family": "yolonas", + "variant": "m", + "source": "libreyolo", + "weights": "LibreYOLONASm.pt", + "input_size": 640 + }, + "dataset": { + "id": "coco2017", + "split": "val2017", + "num_images": 5000 + }, + "config": { + "batch_size": 1, + "input_size": 640, + "conf": 0.001, + "iou": 0.6, + "max_det": 300 + }, + "hardware": { + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_memory_gb": 0.0, + "driver_version": "535.161.07", + "cuda_version": "12.4", + "cpu": "Intel(R) Xeon(R) Gold 6240R CPU @ 2.40GHz", + "cpu_cores": 96, + "ram_gb": 188, + "id": "a100" + }, + "software": { + "python": "3.10.6", + "torch": "2.6.0+cu124", + "libreyolo": "1.0.0", + "libreyolo_commit": "unknown", + "onnxruntime": "not-installed" + }, + "accuracy": { + "mAP_50_95": 0.5053201009790595, + "mAP_50": 0.680631835509829, + "mAP_75": 0.5481069347240959, + "mAP_small": 0.33341324057751653, + "mAP_medium": 0.5609877817843523, + "mAP_large": 0.66780249864481, + "AR1": 0.3796781183403016, + "AR10": 0.6093526447279121, + "AR100": 0.6531268343504432, + "AR_small": 0.46148355958035026, + "AR_medium": 0.7111089195691962, + "AR_large": 0.8183129469284762 + }, + "timing": { + "batch_size": 1, + "num_images": 5000, + "total_ms": { + "mean": 58.919, + "std": 11.085, + "p50": 58.852, + "p95": 66.654, + "p99": 70.503, + "preprocess_ms": 4.523, + "inference_ms": 20.288, + "postprocess_ms": 34.108 + } + }, + "throughput": { + "fps_mean": 16.97, + "fps_p50": 16.99 + }, + "model_stats": { + "params_millions": 51.18, + "gflops": 88.9 + }, + "memory": { + "peak_vram_mb": 362.8, + "peak_ram_mb": 765.2 + }, + "metadata": { + "benchmark_date": "2026-04-25T12:54:32.622272Z", + "benchmark_version": "2.0.0" + }, + "eval": { + "dataset": "coco", + "split": "val2017", + "numImages": 5000 + }, + "implementation": { + "provider": "libreyolo", + "version": "1.0.0" + }, + "runtime": { + "format": "pytorch", + "precision": "fp32", + "provider": "cuda", + "device": "gpu" + } +} diff --git a/submissions/yolonas-s__pytorch__cuda__a100__20260425T124733090752Z.json b/submissions/yolonas-s__pytorch__cuda__a100__20260425T124733090752Z.json new file mode 100644 index 0000000..2b8c989 --- /dev/null +++ b/submissions/yolonas-s__pytorch__cuda__a100__20260425T124733090752Z.json @@ -0,0 +1,108 @@ +{ + "schema_version": "va.submission.v1", + "submission_id": "yolonas-s-pytorch-cuda-a100-20260425T124733Z", + "created_at": "2026-04-25T12:47:33.090752Z", + "benchmark": { + "harness": "vision-analysis-benchmark", + "harness_version": "2.0.0", + "libreyolo_version": "1.0.0", + "libreyolo_commit": "unknown" + }, + "model": { + "id": "yolonas-s", + "name": "yolo-nas-s", + "family": "yolonas", + "variant": "s", + "source": "libreyolo", + "weights": "LibreYOLONASs.pt", + "input_size": 640 + }, + "dataset": { + "id": "coco2017", + "split": "val2017", + "num_images": 5000 + }, + "config": { + "batch_size": 1, + "input_size": 640, + "conf": 0.001, + "iou": 0.6, + "max_det": 300 + }, + "hardware": { + "gpu": "NVIDIA A100-PCIE-40GB", + "gpu_memory_gb": 0.0, + "driver_version": "535.161.07", + "cuda_version": "12.4", + "cpu": "Intel(R) Xeon(R) Gold 6240R CPU @ 2.40GHz", + "cpu_cores": 96, + "ram_gb": 188, + "id": "a100" + }, + "software": { + "python": "3.10.6", + "torch": "2.6.0+cu124", + "libreyolo": "1.0.0", + "libreyolo_commit": "unknown", + "onnxruntime": "not-installed" + }, + "accuracy": { + "mAP_50_95": 0.4645304547401187, + "mAP_50": 0.642008179059548, + "mAP_75": 0.5008371740883274, + "mAP_small": 0.27570486453310866, + "mAP_medium": 0.5189450810664605, + "mAP_large": 0.6268924669881927, + "AR1": 0.35814680053861303, + "AR10": 0.572094540726922, + "AR100": 0.6124886464322183, + "AR_small": 0.40104974162181756, + "AR_medium": 0.6784688448552201, + "AR_large": 0.7894890181876775 + }, + "timing": { + "batch_size": 1, + "num_images": 5000, + "total_ms": { + "mean": 60.06, + "std": 12.719, + "p50": 59.968, + "p95": 66.258, + "p99": 69.25, + "preprocess_ms": 4.559, + "inference_ms": 19.195, + "postprocess_ms": 36.306 + } + }, + "throughput": { + "fps_mean": 16.65, + "fps_p50": 16.68 + }, + "model_stats": { + "params_millions": 19.05, + "gflops": 32.8 + }, + "memory": { + "peak_vram_mb": 173.8, + "peak_ram_mb": 753.9 + }, + "metadata": { + "benchmark_date": "2026-04-25T12:47:33.090752Z", + "benchmark_version": "2.0.0" + }, + "eval": { + "dataset": "coco", + "split": "val2017", + "numImages": 5000 + }, + "implementation": { + "provider": "libreyolo", + "version": "1.0.0" + }, + "runtime": { + "format": "pytorch", + "precision": "fp32", + "provider": "cuda", + "device": "gpu" + } +} diff --git a/support-matrix.json b/support-matrix.json index 63aa350..5ab2db0 100644 --- a/support-matrix.json +++ b/support-matrix.json @@ -28,7 +28,10 @@ "dfine-s", "dfine-m", "dfine-l", - "dfine-x" + "dfine-x", + "yolonas-s", + "yolonas-m", + "yolonas-l" ], "runtimes": [ { diff --git a/website/src/data/metadata/families.json b/website/src/data/metadata/families.json index 92f5af0..8152687 100644 --- a/website/src/data/metadata/families.json +++ b/website/src/data/metadata/families.json @@ -142,6 +142,24 @@ }, "originalRepo": "https://github.com/Peterande/D-FINE", "trainingDataset": "COCO train2017" + }, + { + "id": "yolonas", + "displayName": "YOLO-NAS", + "organization": "Deci AI (acquired by NVIDIA)", + "paper": { + "title": "YOLO-NAS by Deci", + "authors": [ + "Eugene Khvedchenya", + "Shay Aharon", + "Eran Bezalel", + "Lotem Fridman" + ], + "venue": "Deci AI technical report 2023", + "url": "https://github.com/Deci-AI/super-gradients/blob/master/YOLONAS.md" + }, + "originalRepo": "https://github.com/Deci-AI/super-gradients", + "trainingDataset": "Objects365 + COCO + Roboflow-100" } ] } diff --git a/website/src/data/metadata/models.json b/website/src/data/metadata/models.json index 91f513a..83ec96f 100644 --- a/website/src/data/metadata/models.json +++ b/website/src/data/metadata/models.json @@ -1159,6 +1159,96 @@ "license": "Apache-2.0", "releaseDate": "2024-10-18", "inLibreYOLO": true + }, + { + "id": "yolonas-s", + "displayName": "YOLO-NAS-S", + "family": "yolonas", + "variant": "s", + "task": "detection", + "architecture": { + "type": "one-stage", + "backbone": "Neural-Architecture-Search (AutoNAC)", + "neck": "PANet", + "head": "Decoupled (anchor-free, distribution focal loss)" + }, + "detectionApproach": "anchor-free", + "postprocessing": "nms", + "specs": { + "paramsM": 19.02, + "flopsG": 32.8, + "inputSizeDefault": 640, + "numClasses": 80 + }, + "paperReportedMap": 47.5, + "source": { + "originalWeightsUrl": "https://sg-hub-nv.s3.amazonaws.com/yolo_nas", + "libreyoloWeightsUrl": null + }, + "license": "Apache-2.0", + "releaseDate": "2023-05-08", + "inLibreYOLO": true, + "notes": "Weights are auto-downloaded from Deci's CloudFront CDN (d2gjn4b69gu75n.cloudfront.net) inside LibreYOLO. License is Apache-2.0 with a non-commercial attribution clause for the pretrained weights \u2014 see the upstream super_gradients repo." + }, + { + "id": "yolonas-m", + "displayName": "YOLO-NAS-M", + "family": "yolonas", + "variant": "m", + "task": "detection", + "architecture": { + "type": "one-stage", + "backbone": "Neural-Architecture-Search (AutoNAC)", + "neck": "PANet", + "head": "Decoupled (anchor-free, distribution focal loss)" + }, + "detectionApproach": "anchor-free", + "postprocessing": "nms", + "specs": { + "paramsM": 51.1, + "flopsG": 88.9, + "inputSizeDefault": 640, + "numClasses": 80 + }, + "paperReportedMap": 51.55, + "source": { + "originalWeightsUrl": "https://sg-hub-nv.s3.amazonaws.com/yolo_nas", + "libreyoloWeightsUrl": null + }, + "license": "Apache-2.0", + "releaseDate": "2023-05-08", + "inLibreYOLO": true, + "notes": "Weights are auto-downloaded from Deci's CloudFront CDN (d2gjn4b69gu75n.cloudfront.net) inside LibreYOLO. License is Apache-2.0 with a non-commercial attribution clause for the pretrained weights \u2014 see the upstream super_gradients repo." + }, + { + "id": "yolonas-l", + "displayName": "YOLO-NAS-L", + "family": "yolonas", + "variant": "l", + "task": "detection", + "architecture": { + "type": "one-stage", + "backbone": "Neural-Architecture-Search (AutoNAC)", + "neck": "PANet", + "head": "Decoupled (anchor-free, distribution focal loss)" + }, + "detectionApproach": "anchor-free", + "postprocessing": "nms", + "specs": { + "paramsM": 66.97, + "flopsG": 116.6, + "inputSizeDefault": 640, + "numClasses": 80 + }, + "paperReportedMap": 52.22, + "source": { + "originalWeightsUrl": "https://sg-hub-nv.s3.amazonaws.com/yolo_nas", + "libreyoloWeightsUrl": null + }, + "license": "Apache-2.0", + "releaseDate": "2023-05-08", + "inLibreYOLO": true, + "notes": "Weights are auto-downloaded from Deci's CloudFront CDN (d2gjn4b69gu75n.cloudfront.net) inside LibreYOLO. License is Apache-2.0 with a non-commercial attribution clause for the pretrained weights \u2014 see the upstream super_gradients repo." } ] }