From 1cec7dd4a8f21673dce9ada28ac0d5736c4f2d04 Mon Sep 17 00:00:00 2001
From: Maxim Stykow <maxim.stykow@gmail.com>
Date: Thu, 23 Apr 2026 00:38:21 +0200
Subject: [PATCH 1/3] fix(copyright): capture comment attribution authors

Improve shared author extraction for comment-style attribution lines so compare-outputs can keep Bower verification repos aligned without target-specific handling.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
Signed-off-by: Maxim Stykow <maxim.stykow@gmail.com>
---
 src/scanner/process/copyright.rs      | 70 ++++++++++++++++++++++++++-
 src/scanner/process/copyright_test.rs | 21 ++++++++
 2 files changed, 89 insertions(+), 2 deletions(-)
diff --git a/src/scanner/process/copyright.rs b/src/scanner/process/copyright.rs
index 74ce6b083..6d447734e 100644
--- a/src/scanner/process/copyright.rs
+++ b/src/scanner/process/copyright.rs
@@ -228,6 +228,12 @@ fn extract_comment_author_supplements(text_content: &str) -> Vec<AuthorDetection
         )
         .expect("valid comment author regex")
     });
+    static COMMENT_PAREN_CONTACT_AUTHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
+        Regex::new(
+            r"(?i)\b(?:written|edited|modified|updated|originally)\s+by\s+(?P<name>[^()\n]+?)\s*\(\s*(?P<contact>(?:[^)\s]+@[^)\s]+|https?://[^)\s]+))\s*\)\s*\.?$|^(?:[#;/*!\-\s]+)?(?:[^()\n]*?\bby\s+(?P<name2>[^()\n]+?)\s*\(\s*(?P<contact2>(?:[^)\s]+@[^)\s]+|https?://[^)\s]+))\s*\))\s*\.?$",
+        )
+        .expect("valid parenthesized contact author regex")
+    });
     static DOCKER_MAINTAINER_LABEL_RE: LazyLock<Regex> = LazyLock::new(|| {
         Regex::new(r#"(?i)^label\s+maintainer\s*=\s*[\"']?(?P<author>[^\"'\n]+<[^>]+>)[\"']?\s*$"#)
             .expect("valid docker maintainer label regex")
@@ -241,21 +247,41 @@ fn extract_comment_author_supplements(text_content: &str) -> Vec<AuthorDetection
 
     for (line_index, line) in text_content.lines().enumerate() {
         let trimmed = line.trim();
+        let normalized = normalize_comment_author_line(trimmed);
         let line_number = LineNumber::from_0_indexed(line_index);
 
-        if let Some(captures) = COMMENT_AUTHOR_RE.captures(trimmed)
+        if let Some(captures) = COMMENT_AUTHOR_RE.captures(&normalized)
             && let Some(author) = captures
                 .name("author")
                 .or_else(|| captures.name("author2"))
                 .map(|m| m.as_str().trim())
         {
             authors.push(AuthorDetection {
-                author: author.to_string(),
+                author: normalize_comment_author_candidate(author),
                 start_line: line_number,
                 end_line: line_number,
             });
         }
 
+        if let Some(captures) = COMMENT_PAREN_CONTACT_AUTHOR_RE.captures(&normalized) {
+            let name = captures
+                .name("name")
+                .or_else(|| captures.name("name2"))
+                .map(|m| m.as_str().trim());
+            let contact = captures
+                .name("contact")
+                .or_else(|| captures.name("contact2"))
+                .map(|m| m.as_str().trim());
+
+            if let (Some(name), Some(contact)) = (name, contact) {
+                authors.push(AuthorDetection {
+                    author: normalize_parenthesized_contact_author(name, contact),
+                    start_line: line_number,
+                    end_line: line_number,
+                });
+            }
+        }
+
         if let Some(captures) = DOCKER_MAINTAINER_LABEL_RE.captures(trimmed)
             && let Some(author) = captures.name("author").map(|m| m.as_str().trim())
         {
@@ -287,6 +313,46 @@ fn extract_comment_author_supplements(text_content: &str) -> Vec<AuthorDetection
     authors
 }
 
+fn normalize_comment_author_line(line: &str) -> String {
+    line.trim()
+        .trim_end_matches("*/")
+        .trim_end_matches("-->")
+        .trim()
+        .to_string()
+}
+
+fn normalize_comment_author_candidate(author: &str) -> String {
+    static ANGLE_URL_AUTHOR_RE: LazyLock<Regex> = LazyLock::new(|| {
+        Regex::new(r"^(?P<name>[^<>]+?)\s*<\s*(?P<url>https?://[^>\s]+)\s*>\s*$")
+            .expect("valid angle url author regex")
+    });
+
+    let trimmed = author.trim().trim_end_matches('.').trim();
+    if let Some(captures) = ANGLE_URL_AUTHOR_RE.captures(trimmed) {
+        let name = captures
+            .name("name")
+            .map(|m| m.as_str().trim())
+            .unwrap_or(trimmed);
+        let url = captures
+            .name("url")
+            .map(|m| m.as_str().trim_end_matches('/'))
+            .unwrap_or(trimmed);
+        return format!("{name} {url}");
+    }
+
+    trimmed.to_string()
+}
+
+fn normalize_parenthesized_contact_author(name: &str, contact: &str) -> String {
+    let normalized_name = name.trim().trim_end_matches('.').trim();
+    let normalized_contact = if contact.starts_with("http://") || contact.starts_with("https://") {
+        contact.trim_end_matches('/')
+    } else {
+        contact.trim()
+    };
+    format!("{normalized_name} ({normalized_contact})")
+}
+
 fn has_explicit_copyright_marker(text: &str) -> bool {
     let lower = text.to_ascii_lowercase();
     lower.contains("(c)") || lower.contains('©') || lower.contains("copr")
diff --git a/src/scanner/process/copyright_test.rs b/src/scanner/process/copyright_test.rs
index 47e1a1434..72858dd26 100644
--- a/src/scanner/process/copyright_test.rs
+++ b/src/scanner/process/copyright_test.rs
@@ -79,3 +79,24 @@ LABEL maintainer=\"Progress Chef <docker@chef.io>\"\n";
         ]
     );
 }
+
+#[test]
+fn test_extract_comment_author_supplements_handles_c_style_translator_headers() {
+    let text = "/* Translated by Jorge Barreiro <yortx.barry@gmail.com>. */\n\
+/* Written by Mathias Bynens <https://mathiasbynens.be/> */\n\
+/* Written by Cloudream (cloudream@gmail.com). */\n\
+/* Written by S A Sureshkumar (saskumar@live.com). */\n";
+
+    let authors = extract_comment_author_supplements(text);
+    let values: Vec<_> = authors.into_iter().map(|author| author.author).collect();
+
+    assert_eq!(
+        values,
+        vec![
+            "Jorge Barreiro <yortx.barry@gmail.com>",
+            "Mathias Bynens https://mathiasbynens.be",
+            "Cloudream (cloudream@gmail.com)",
+            "S A Sureshkumar (saskumar@live.com)",
+        ]
+    );
+}

From 0c3088ebdd3e8a17a91e994513c75936d0e39f3e Mon Sep 17 00:00:00 2001
From: Maxim Stykow <maxim.stykow@gmail.com>
Date: Thu, 23 Apr 2026 00:39:03 +0200
Subject: [PATCH 2/3] docs(scorecard): mark Bower verification complete

Record the fully reviewed Bower compare-target set as verified while keeping the row notes stable and the verification narrative in benchmark artifacts instead.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
Signed-off-by: Maxim Stykow <maxim.stykow@gmail.com>
---
 .../package-detection/PARSER_VERIFICATION_SCORECARD.md        | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/docs/implementation-plans/package-detection/PARSER_VERIFICATION_SCORECARD.md b/docs/implementation-plans/package-detection/PARSER_VERIFICATION_SCORECARD.md
index 017b76070..d393114e1 100644
--- a/docs/implementation-plans/package-detection/PARSER_VERIFICATION_SCORECARD.md
+++ b/docs/implementation-plans/package-detection/PARSER_VERIFICATION_SCORECARD.md
@@ -57,8 +57,6 @@ Method rules:
 
 The ranking below is ordered by **practical verification value first**: broad ecosystem prevalence, likelihood of exposing real parser-plus-license/copyright interactions under `--profile common`, and coverage breadth within the implemented family.
 
-<<<<<<< HEAD
-
 | Priority | Ecosystem                                                                       | Status      | Candidate targets                                                                                                                                                                                                                                                         | Priority and scope notes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | -------- | ------------------------------------------------------------------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
 | 0a       | Cross-cutting broad `C++` repository scans (non-parser reference)               | 🟢 Verified | `boostorg/boost` (236 files)<br>`boostorg/json` (701 files)<br>`mongodb/mongo` (11k files)                                                                                                                                                                                | There is no generic `C++` parser row. These repositories are still valuable reference targets because they exercise multiple implemented `C++`-adjacent families and package-adjacent detection in realistic trees. They complement, but do not replace, family-specific verification for Autotools, Conan, vcpkg, Bazel, and Buck.                                                                                                                                                                                                                                                                                           |
@@ -112,7 +110,7 @@ The ranking below is ordered by **practical verification value first**: broad ec
 | 36       | Buck                                                                            | 🟢 Verified | `facebook/buck2` (2k–10k files)<br>`facebook/watchman` (500–2k files)<br>`facebook/react-native` (10k–50k files)                                                                                                                                                          | Real Buck lane, even if narrower than Bazel in practice. `facebook/buck2` is the canonical direct reference, `facebook/watchman` is a smaller focused contrast, and `facebook/react-native` adds a large mixed-language consumer tree. Watch Buck metadata separately from the rest of the monorepo so unrelated JS/native/common-profile noise does not hide actual build-metadata gaps.                                                                                                                                                                                                                                     |
 | 37       | FreeBSD                                                                         | ⚪ Planned  | FreeBSD `pkg` package archive sample<br>FreeBSD `bash` package archive sample<br>FreeBSD `curl` package archive sample                                                                                                                                                    | Important artifact-family support, but narrower day-to-day scan prevalence than the higher-priority distro lanes.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             |
 | 38       | Chef                                                                            | 🟢 Verified | `sous-chefs/apache2` (<500 files)<br>`sous-chefs/mysql` (<500 files)<br>`chef/chef` (2k–10k files)                                                                                                                                                                        | Worth covering, but lower priority than the mainstream language and distro families.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          |
-| 39       | Bower                                                                           | ⚪ Planned  | `jquery/jquery-ui` (500–2k files)<br>`select2/select2` (<500 files)<br>`jashkenas/backbone` (<500 files)                                                                                                                                                                  | Legacy ecosystem with ongoing value mostly for backward compatibility.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
+| 39       | Bower                                                                           | 🟢 Verified | `jquery/jquery-ui` (500–2k files)<br>`select2/select2` (<500 files)<br>`jashkenas/backbone` (<500 files)                                                                                                                                                                  | Legacy ecosystem with ongoing value mostly for backward compatibility.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        |
 | 40       | Haxe                                                                            | ⚪ Planned  | `openfl/openfl` (500–2k files)<br>`HaxeFlixel/flixel` (500–2k files)<br>`HeapsIO/heaps` (500–2k files)                                                                                                                                                                    | Smaller ecosystem; still useful, but lower-value than the broader mainstream families above.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |
 | 41       | Windows Update                                                                  | ⚪ Planned  | `wsusscn2.cab` extracted tree<br>Windows cumulative update `.msu` extracted tree<br>Windows servicing stack update extracted tree                                                                                                                                         | Artifact-oriented family with real value, but specialized and best handled after the higher-signal source/package ecosystems.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 |
 | 42       | `misc.py` recognizers                                                           | ⚪ Planned  | Apache Tomcat binary release artifacts<br>Firefox add-on / language-pack artifacts<br>NSIS official installer artifacts                                                                                                                                                   | Broad recognizer family, but not a normal package-manager lane; treat as specialized follow-up verification.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  |

From 18d2b452b5c47120b6197bbee96006543e80acdf Mon Sep 17 00:00:00 2001
From: Maxim Stykow <maxim.stykow@gmail.com>
Date: Thu, 23 Apr 2026 00:39:31 +0200
Subject: [PATCH 3/3] docs(benchmarks): record Bower verification runs

Add the reviewed jquery-ui, select2, and backbone compare snapshots to the benchmark table and regenerate the chart so the aggregate stats stay in sync with the new recorded runs.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
Signed-off-by: Maxim Stykow <maxim.stykow@gmail.com>
---
 docs/BENCHMARKS.md                         |  5 ++++-
 docs/benchmarks/scan-duration-vs-files.svg | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/docs/BENCHMARKS.md b/docs/BENCHMARKS.md
index 12f8d4de1..306104eca 100644
--- a/docs/BENCHMARKS.md
+++ b/docs/BENCHMARKS.md
@@ -11,7 +11,7 @@ The chart below uses a log-log scatter plot: file count on the x-axis, wall-cloc
 
 ![Scan duration vs. file count for Provenant and ScanCode](benchmarks/scan-duration-vs-files.svg)
 
-> Provenant is faster on 142 of 144 recorded runs, with a **11.6× median speedup** and **10.1× geometric-mean speedup** overall; the median gap grows from **6.4×** on sub-100-file targets to **20.1×** on 10k+ file targets.
+> Provenant is faster on 145 of 147 recorded runs, with a **11.6× median speedup** and **10.2× geometric-mean speedup** overall; the median gap grows from **6.4×** on sub-100-file targets to **20.1×** on 10k+ file targets.
 > Generated from the benchmark timing rows in this document via `cargo run --manifest-path xtask/Cargo.toml --bin generate-benchmark-chart`.
 
 ## Current benchmark examples
@@ -82,12 +82,15 @@ The tables below provide the per-target detail behind the chart. Each row is one
 | [denoland/std @ a864f62](https://github.com/denoland/std/tree/a864f62bcc8a5f20716d2becab3cfe224a2ad810)<br>2,812 files                  | 2026-04-22 · std-31214 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 10 proc        | Provenant: 16.30s<br>ScanCode: 394.76s<br>**24.22× faster (-95.9%)**   | Broader Deno package visibility (`45` vs `3` packages) from the root and leaf `*/deno.json` manifests across the standard-library tree, plus concrete Cargo lock package identities on embedded Rust fixtures instead of anonymous `cargo_lock` rows, with zero top-level license-expression deltas under the shared profile                                    |
 | [getsentry/self-hosted @ 8728919](https://github.com/getsentry/self-hosted/tree/8728919e080836c53724f277d4d36cc310fc5011)<br>129 files  | 2026-04-15 · self-hosted-22209 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc | Provenant: 12.14s<br>ScanCode: 78.89s<br>**6.50× faster (-84.6%)**     | Broader mixed Docker/npm/Python package extraction (`2` vs `1` packages, `111` vs `0` dependencies) from the integration-test `package-lock.json`, `uv.lock`, and committed service Dockerfiles, plus the more specific `Apache-2.0 AND FSL-1.1-ALv2` license classification on `LICENSE.md` where ScanCode reports only `FSL-1.1-ALv2`                         |
 | [iTowns/itowns @ 08e08f5](https://github.com/iTowns/itowns/tree/08e08f512983b6f3d60d04d431b67b3c5e2e1584)<br>616 files                  | 2026-04-19 · itowns-87752 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 10 proc     | Provenant: 12.53s<br>ScanCode: 170.19s<br>**13.58× faster (-92.6%)**   | Direct `publiccode.yml` package visibility on the root metadata file (`1` vs `0` on that file), with matched top-level package and dependency counts elsewhere plus Unicode-preserving Potree copyright normalization and cleaner URL shaping across README and docs material                                                                                   |
+| [jashkenas/backbone @ da75718](https://github.com/jashkenas/backbone/tree/da75718e896e52e84aa1f0411ba67fafcdcf6af3)<br>122 files        | 2026-04-22 · backbone-8407 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 4 proc     | Provenant: 11.27s<br>ScanCode: 104.56s<br>**9.28× faster (-89.2%)**    | Matched Bower package and dependency coverage on the repo-root `bower.json`, with datasource-tagged Bower package identity instead of a bare purl-only row, package-level party metadata from `package.json`, and much faster same-host runtime                                                                                                                 |
+| [jquery/jquery-ui @ eda7aa3](https://github.com/jquery/jquery-ui/tree/eda7aa34fa59d8f764b2164be3e3b7f14639b0db)<br>1,083 files          | 2026-04-22 · jquery-ui-93350 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 4 proc   | Provenant: 15.56s<br>ScanCode: 303.29s<br>**19.49× faster (-94.9%)**   | Matched Bower package and dependency coverage on the repo-root `bower.json`, with datasource-tagged Bower package identity instead of a bare purl-only row, cleaner Unicode-preserving author normalization across locale files and vendored docs, and much faster same-host runtime                                                                            |
 | [metabase/metabase @ 10997b1](https://github.com/metabase/metabase/tree/10997b10908414ab05773b085a56a37fcdebcd1a)<br>18,030 files       | 2026-04-13 · metabase-21346 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc    | Provenant: 51.84s<br>ScanCode: 1330.92s<br>**25.67× faster (-96.1%)**  | Broader package and dependency extraction (`8` vs `1` packages, `1436` vs `423` dependencies) from the root and driver `deps.edn` manifests plus committed `bun.lock` and `uv.lock`, with cleaner OFL font URL normalization where ScanCode preserves broken concatenated links                                                                                 |
 | [microsoft/vscode @ 0c1e100](https://github.com/microsoft/vscode/tree/0c1e100626c19724d1222c2bc4b63ba3556858a7)<br>14,398 files         | 2026-04-12 · vscode-89240 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc      | Provenant: 58.96s<br>ScanCode: 1410.57s<br>**23.92× faster (-95.8%)**  | Broader monorepo package and dependency extraction (`138` vs `1` packages, `7718` vs `1815` dependencies) from the root `package-lock.json`, many extension fixture manifests and lockfiles, and embedded Cargo/Docker metadata, plus richer named package identities where ScanCode emits generic lockfile and archive rows                                    |
 | [npm/cli @ 05dbba5](https://github.com/npm/cli/tree/05dbba5b8d727ddb2c098ce0553714eae791c5f2)<br>6,698 files                            | 2026-04-09 · cli-89026 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 4 proc         | Provenant: 295.10s<br>ScanCode: 3376.85s<br>**11.44× faster (-91.3%)** | Clean root npm workspace manifest coverage without ScanCode's workspace-assembly scan errors, fewer large registry-fixture JSON timeouts, and cleaner handling of duplicated private-workspace dependency exports and repeated MIT-style registry-fixture metadata noise                                                                                        |
 | [oakserver/oak @ 185baef](https://github.com/oakserver/oak/tree/185baef02551a84798000f25d3bd01c2fdfcb1ce)<br>103 files                  | 2026-04-22 · oak-39847 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 10 proc        | Provenant: 12.95s<br>ScanCode: 115.73s<br>**8.94× faster (-88.8%)**    | Direct Deno package visibility on the root `deno.json` (`1` vs `0` packages), plus Dockerfile package visibility on `.devcontainer/Dockerfile`, with cleaner trailing-slash URL normalization across README and docs material                                                                                                                                   |
 | [oven-sh/bun @ 700fc11](https://github.com/oven-sh/bun/tree/700fc117a2fd01ac0201deaa6fa69c5557acb04f)<br>12,551 files                   | 2026-04-09 · bun-18972 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc         | Provenant: 43.05s<br>ScanCode: 849.10s<br>**19.72× faster (-94.9%)**   | Far broader Bun/npm-family package extraction (`382` vs `29` packages, `5773` vs `323` dependencies) from the repo's 52 committed `bun.lock` / `bun.lockb` inputs that ScanCode leaves at zero, plus legacy `bun.lockb` coverage on `bench/bundle` and plainer `BSD-2-Clause` rebucketing where ScanCode uses the over-specific `BSD-2-Clause-Views` label      |
 | [renovatebot/renovate @ 91a7213](https://github.com/renovatebot/renovate/tree/91a72131e8aefcda8f0dab7499f378f7eb41300f)<br>3,663 files  | 2026-04-13 · renovate-30308 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc    | Provenant: 23.74s<br>ScanCode: 446.79s<br>**18.82× faster (-94.7%)**   | Broader fixture-heavy package and dependency extraction (`52` vs `1` packages, `1778` vs `1485` dependencies) from committed `project.clj`, `deps.edn`, and cross-ecosystem manager fixtures, plus Leiningen package identity on `lib/modules/manager/leiningen/__fixtures__/project.clj` where ScanCode stays manifest-blind                                   |
+| [select2/select2 @ 595494a](https://github.com/select2/select2/tree/595494a72fee67b0a61c64701cbb72e3121f97b9)<br>704 files              | 2026-04-22 · select2-925 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 4 proc       | Provenant: 12.57s<br>ScanCode: 146.24s<br>**11.63× faster (-91.4%)**   | Matched Bower package and dependency coverage on the repo-root `bower.json`, with datasource-tagged Bower package identity instead of a bare purl-only row, cleaner package-author normalization in `package.json`, and much faster same-host runtime                                                                                                           |
 | [vercel/next.js @ 8e5a36f](https://github.com/vercel/next.js/tree/8e5a36f6347528d8968da97262f372f908897bac)<br>28,044 files             | 2026-04-11 · next.js-35897 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc     | Provenant: 41.11s<br>ScanCode: 850.20s<br>**20.68× faster (-95.2%)**   | Broader monorepo package and dependency extraction (`464` vs `249` packages, `13787` vs `12017` dependencies) from the root `pnpm-lock.yaml`, many workspace fixture subtrees, and embedded Cargo/npm metadata, plus zero scan errors where ScanCode crashes on workspace `package.json` and `pnpm-lock.yaml` inputs                                            |
 | [yarnpkg/berry @ c0274d6](https://github.com/yarnpkg/berry/tree/c0274d6d7ba5939f447e78aaf16e456a00cf0bd1)<br>3,552 files                | 2026-04-12 · berry-43600 · macOS 26.3.1 · Apple M1 Max · 32 GB · arm64 · 9 proc       | Provenant: 23.75s<br>ScanCode: 194.82s<br>**8.20× faster (-87.8%)**    | Broader dependency extraction (`2835` vs `1301`) from Berry `yarn.lock`, workspace manifests, and `.pnp.cjs`, plus cleaner workspace package assembly that avoids ScanCode's duplicated npm package rows (`204` vs `395`) and `package.json` / `yarn.lock` assembly crashes while still surfacing extra Docker and Windows package inputs committed in the tree |
 
diff --git a/docs/benchmarks/scan-duration-vs-files.svg b/docs/benchmarks/scan-duration-vs-files.svg
index 4afc8354d..ae3e81b15 100644
--- a/docs/benchmarks/scan-duration-vs-files.svg
+++ b/docs/benchmarks/scan-duration-vs-files.svg
@@ -134,6 +134,9 @@ ScanCode: 92.08s</title></rect>
     <rect x="415.27" y="418.23" width="8" height="8" fill="#d97706" rx="1.5"><title>SwiftFiddle/swiftfiddle-web @ df09b80
 Files: 109
 ScanCode: 84.73s</title></rect>
+    <rect x="423.04" y="408.29" width="8" height="8" fill="#d97706" rx="1.5"><title>jashkenas/backbone @ da75718
+Files: 122
+ScanCode: 104.56s</title></rect>
     <rect x="426.88" y="421.60" width="8" height="8" fill="#d97706" rx="1.5"><title>getsentry/self-hosted @ 8728919
 Files: 129
 ScanCode: 78.89s</title></rect>
@@ -233,6 +236,9 @@ ScanCode: 203.47s</title></rect>
     <rect x="543.52" y="391.18" width="8" height="8" fill="#d97706" rx="1.5"><title>boostorg/json @ 70efd4b
 Files: 701
 ScanCode: 150.19s</title></rect>
+    <rect x="543.81" y="392.44" width="8" height="8" fill="#d97706" rx="1.5"><title>select2/select2 @ 595494a
+Files: 704
+ScanCode: 146.24s</title></rect>
     <rect x="555.41" y="432.81" width="8" height="8" fill="#d97706" rx="1.5"><title>tokio-rs/tokio @ 5db10f5
 Files: 833
 ScanCode: 62.23s</title></rect>
@@ -254,6 +260,9 @@ ScanCode: 84.36s</title></rect>
     <rect x="570.04" y="418.11" width="8" height="8" fill="#d97706" rx="1.5"><title>composer/composer @ a2bf8cb
 Files: 1030
 ScanCode: 84.94s</title></rect>
+    <rect x="573.49" y="357.97" width="8" height="8" fill="#d97706" rx="1.5"><title>jquery/jquery-ui @ eda7aa3
+Files: 1083
+ScanCode: 303.29s</title></rect>
     <rect x="574.44" y="398.92" width="8" height="8" fill="#d97706" rx="1.5"><title>pointfreeco/swift-composable-architecture @ 7517cc3
 Files: 1098
 ScanCode: 127.50s</title></rect>
@@ -568,6 +577,9 @@ Provenant: 10.77s</title></circle>
     <circle cx="419.27" cy="522.22" r="4.5" fill="#2563eb"><title>SwiftFiddle/swiftfiddle-web @ df09b80
 Files: 109
 Provenant: 10.21s</title></circle>
+    <circle cx="427.04" cy="517.55" r="4.5" fill="#2563eb"><title>jashkenas/backbone @ da75718
+Files: 122
+Provenant: 11.27s</title></circle>
     <circle cx="430.88" cy="514.04" r="4.5" fill="#2563eb"><title>getsentry/self-hosted @ 8728919
 Files: 129
 Provenant: 12.14s</title></circle>
@@ -667,6 +679,9 @@ Provenant: 14.37s</title></circle>
     <circle cx="547.52" cy="467.80" r="4.5" fill="#2563eb"><title>boostorg/json @ 70efd4b
 Files: 701
 Provenant: 32.30s</title></circle>
+    <circle cx="547.81" cy="512.39" r="4.5" fill="#2563eb"><title>select2/select2 @ 595494a
+Files: 704
+Provenant: 12.57s</title></circle>
     <circle cx="559.41" cy="493.35" r="4.5" fill="#2563eb"><title>tokio-rs/tokio @ 5db10f5
 Files: 833
 Provenant: 18.81s</title></circle>
@@ -688,6 +703,9 @@ Provenant: 20.09s</title></circle>
     <circle cx="574.04" cy="487.63" r="4.5" fill="#2563eb"><title>composer/composer @ a2bf8cb
 Files: 1030
 Provenant: 21.23s</title></circle>
+    <circle cx="577.49" cy="502.31" r="4.5" fill="#2563eb"><title>jquery/jquery-ui @ eda7aa3
+Files: 1083
+Provenant: 15.56s</title></circle>
     <circle cx="578.44" cy="521.35" r="4.5" fill="#2563eb"><title>pointfreeco/swift-composable-architecture @ 7517cc3
 Files: 1098
 Provenant: 10.40s</title></circle>