From da2ef6d84aa24ac2a1e64fdd7c8e79147833c7a9 Mon Sep 17 00:00:00 2001 From: Geller Bedoya Date: Mon, 15 Jun 2026 11:54:29 -0700 Subject: [PATCH] feat(ecosystem): Add Cargo (Rust) package scanning --- README.md | 1 + docs/inventory-sources.md | 46 +++- internal/ecosystem/cargo/cargo.go | 280 +++++++++++++++++++++++++ internal/ecosystem/cargo/cargo_test.go | 252 ++++++++++++++++++++++ internal/model/model.go | 3 + internal/osv/osv.go | 5 +- internal/osv/osv_test.go | 7 +- internal/scanner/scanner.go | 10 + 8 files changed, 598 insertions(+), 6 deletions(-) create mode 100644 internal/ecosystem/cargo/cargo.go create mode 100644 internal/ecosystem/cargo/cargo_test.go diff --git a/README.md b/README.md index 0776dc0..6a14258 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ know what they are looking for. | Bun | `npm` | `bun.lock`; `bun.lockb` presence as diagnostic | | PyPI | `pypi` | `*.dist-info/METADATA`, `INSTALLER`, `direct_url.json`, `*.egg-info/PKG-INFO` | | Go modules | `go` | `go.sum`, `go.mod` | +| Cargo (Rust) | `crates.io` | `~/.cargo/.crates2.json` (installed binaries), `Cargo.lock` | | RubyGems | `rubygems` | `Gemfile.lock`, installed `*.gemspec` | | Composer | `packagist` | `composer.lock`, `vendor/composer/installed.json` | | MCP | `mcp` | JSON host configs: `mcp.json`, `.mcp.json`, `claude_desktop_config.json`, `mcp_config.json`, `mcp_settings.json`, `cline_mcp_settings.json`, plus `~/.gemini/settings.json` (Gemini CLI / Code Assist) and `~/.claude.json` (Claude Code user- and project-scoped `mcpServers`). Non-JSON configs (Codex `config.toml`, Continue YAML) are not parsed in v0.1. | diff --git a/docs/inventory-sources.md b/docs/inventory-sources.md index 1e64555..c8e786c 100644 --- a/docs/inventory-sources.md +++ b/docs/inventory-sources.md @@ -231,6 +231,51 @@ References: - `go.sum` and `go.mod` reference: - Module cache layout: +## Cargo (Rust) + +Files read: + +- `~/.cargo/.crates2.json` — Cargo's JSON record of every binary installed + via `cargo install`. Each `installs` entry's key is a + `" ()"` triple that produces a high-confidence + record marked `direct_dependency=true` (every entry was an explicit + `cargo install` invocation). Dispatch is path-aware: the file is parsed + only when its parent directory is named `.cargo`. +- `Cargo.lock` — Cargo's TOML lockfile. Each `[[package]]` block with a + non-empty `source` produces a high-confidence record; this covers + registry crates as well as `git+…` and other sourced dependencies. + Blocks without a `source` are workspace-local crates (the root package + and any path dependencies) and are skipped: they have no registry + coordinate and cannot match a published-package exposure catalog entry. + +Captured fields emitted on the record: `package_name`, `version`, +`package_manager=cargo`, `source_type` (`cargo-crates2-installs` or +`cargo-lock`), `confidence=high`, and `direct_dependency` on +`.crates2.json` records. + +The user-package baseline already walks `~/.cargo`, so the installs file +is picked up without additional configuration. `Cargo.toml` (the +manifest) is intentionally not parsed: its version requirements are +ranges rather than exact pins and would produce ambiguous records. + +We do not run `cargo install --list`, `cargo metadata`, or any other +Cargo subcommand. + +Because the user-package baseline walks `~/.cargo` and dispatch on +`Cargo.lock` is by basename, any `Cargo.lock` under +`~/.cargo/registry/src//-/` (the lockfile the +crate's author shipped at publish time) is parsed and emitted with +`source_file` inside that directory. These records reflect "what some +upstream crate author pinned," not "what the user selected," so they +overstate exposure on developer hosts and should be filtered downstream +by `source_file` prefix if that is undesirable. The `.crate` tarballs in +`~/.cargo/registry/cache/` are not unpacked and contribute no records. + +References: + +- Cargo `.crates2.json` install record: +- Cargo lockfile reference: + ## RubyGems / Bundler Files read: @@ -650,7 +695,6 @@ strong installed-state correlation tooling today. ## Not currently covered -- Cargo (`Cargo.lock`). - Maven / Gradle (`pom.xml`, lockfiles). - NuGet (`packages.lock.json`). - Hex (`mix.lock`). diff --git a/internal/ecosystem/cargo/cargo.go b/internal/ecosystem/cargo/cargo.go new file mode 100644 index 0000000..de958df --- /dev/null +++ b/internal/ecosystem/cargo/cargo.go @@ -0,0 +1,280 @@ +// Package cargo scans Rust/Cargo package artifacts. +// +// Two on-disk surfaces are read: +// +// - `~/.cargo/.crates2.json` — the canonical record of every binary +// installed via `cargo install`. Highest-confidence baseline source: +// each entry names the crate, version, and source registry. +// - `Cargo.lock` — TOML lockfile listing the resolved dependency tree. +// Higher-confidence than `Cargo.toml` because versions are pinned. +// +// No `cargo` commands are executed. Detection is path-/filename-based. +// `Cargo.toml` (the manifest) is intentionally not parsed: version +// requirements there are ranges rather than exact pins and would +// produce ambiguous records. +package cargo + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/perplexityai/bumblebee/internal/model" +) + +const Ecosystem = model.EcosystemCargo + +const ( + packageManager = "cargo" + crates2SourceType = "cargo-crates2-installs" + cargoLockSourceType = "cargo-lock" + crates2FileName = ".crates2.json" + cargoLockFileName = "Cargo.lock" + cargoDirName = ".cargo" +) + +type Scanner struct { + MaxFileSize int64 + Emit func(model.Record) + Diag func(level, path, msg string) +} + +// IsCargoLock reports whether base is a Cargo lockfile. +func IsCargoLock(base string) bool { return base == cargoLockFileName } + +// IsCrates2JSON reports whether path is `/.crates2.json`. +// Dispatch is path-aware rather than basename-only because `.crates2.json` +// is unique to Cargo and only meaningful inside a Cargo home directory. +func IsCrates2JSON(path string) bool { + return filepath.Base(path) == crates2FileName && + filepath.Base(filepath.Dir(path)) == cargoDirName +} + +// crates2File is the on-disk shape of `~/.cargo/.crates2.json`. Cargo +// writes a single `installs` object whose keys are +// `" ()"` triples. The value carries install +// metadata; we only consult `bins` to record whether the entry produced +// any binaries (a hint that informs the high-confidence default). +type crates2File struct { + Installs map[string]crates2Install `json:"installs"` +} + +type crates2Install struct { + Bins []string `json:"bins"` +} + +func (s *Scanner) ScanCrates2JSON(path string, base model.Record) error { + data, err := s.readBounded(path) + if err != nil { + return err + } + var doc crates2File + if err := json.Unmarshal(data, &doc); err != nil { + if s.Diag != nil { + s.Diag("warn", path, "skipping malformed .crates2.json: "+err.Error()) + } + return nil + } + projectPath := filepath.Dir(path) + for key := range doc.Installs { + name, version, ok := parseCrates2InstallKey(key) + if !ok { + continue + } + r := base + r.Ecosystem = Ecosystem + r.PackageName = name + r.NormalizedName = strings.ToLower(name) + r.Version = version + r.ProjectPath = projectPath + r.PackageManager = packageManager + r.SourceType = crates2SourceType + r.SourceFile = path + // `.crates2.json` only records crates the user explicitly ran + // `cargo install` on, so every entry is a direct dependency. + direct := true + r.DirectDependency = &direct + r.Confidence = "high" + s.Emit(r) + } + return nil +} + +// parseCrates2InstallKey splits a `.crates2.json` install key into its +// crate name and version. The key shape is +// `" ()"` — a crate-name token, a SemVer token, +// then a parenthesized source descriptor. Crate names never contain +// spaces or parentheses, so a left-to-right split on the first two +// spaces is unambiguous. +func parseCrates2InstallKey(key string) (name, version string, ok bool) { + key = strings.TrimSpace(key) + if key == "" { + return "", "", false + } + sp1 := strings.IndexByte(key, ' ') + if sp1 <= 0 { + return "", "", false + } + name = key[:sp1] + rest := key[sp1+1:] + sp2 := strings.IndexByte(rest, ' ') + if sp2 <= 0 { + // No source segment; tolerate `" "` shape. + version = strings.TrimSpace(rest) + return name, version, version != "" + } + version = rest[:sp2] + return name, version, name != "" && version != "" +} + +// ScanCargoLock emits a Record for every third-party crate recorded +// in a Cargo.lock file. The lockfile is the authoritative list of +// resolved package versions for a Rust project, including transitive +// dependencies pulled in from a registry. +func (s *Scanner) ScanCargoLock(path string, base model.Record) error { + data, err := s.readBounded(path) + if err != nil { + return err + } + projectPath := filepath.Dir(path) + pkgs := parseCargoLockPackages(data) + seen := make(map[string]struct{}, len(pkgs)) + for _, p := range pkgs { + if p.name == "" || p.version == "" { + continue + } + // Skip workspace-local crates (root package and path-dependency + // siblings): they are the user's own code, not registry-sourced + // third-party artifacts. Catalog matching is name+version only and + // doesn't consult the source, so a local crate sharing a name with + // a published malicious one would otherwise produce a false positive. + if p.source == "" { + continue + } + key := p.name + "\x00" + p.version + if _, dup := seen[key]; dup { + continue + } + seen[key] = struct{}{} + r := base + r.Ecosystem = Ecosystem + r.PackageName = p.name + r.NormalizedName = strings.ToLower(p.name) + r.Version = p.version + r.ProjectPath = projectPath + r.PackageManager = packageManager + r.SourceType = cargoLockSourceType + r.SourceFile = path + r.Confidence = "high" + s.Emit(r) + } + return nil +} + +type cargoLockPackage struct { + name string + version string + source string +} + +// parseCargoLockPackages scans a Cargo.lock TOML body for `[[package]]` +// blocks and pulls name/version/source from each. The parser is +// deliberately minimal: Cargo.lock is machine-generated with a stable +// shape (one quoted-string value per line, no inline tables for the +// fields we care about), so a line-oriented scan is sufficient and +// keeps the scanner dependency-free. +func parseCargoLockPackages(data []byte) []cargoLockPackage { + var out []cargoLockPackage + sc := bufio.NewScanner(bytes.NewReader(data)) + sc.Buffer(make([]byte, 0, 64*1024), 4*1024*1024) + inPackage := false + var current cargoLockPackage + flush := func() { + if inPackage { + out = append(out, current) + } + current = cargoLockPackage{} + inPackage = false + } + for sc.Scan() { + raw := sc.Text() + line := strings.TrimSpace(raw) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + if strings.HasPrefix(line, "[") { + flush() + if line == "[[package]]" { + inPackage = true + } + continue + } + if !inPackage { + continue + } + key, value, ok := parseCargoLockField(line) + if !ok { + continue + } + switch key { + case "name": + current.name = value + case "version": + current.version = value + case "source": + current.source = value + } + } + flush() + return out +} + +// parseCargoLockField extracts the key and quoted-string value from a +// line shaped like `key = "value"`. +func parseCargoLockField(line string) (key, value string, ok bool) { + eq := strings.IndexByte(line, '=') + if eq <= 0 { + return "", "", false + } + key = strings.TrimSpace(line[:eq]) + rest := strings.TrimSpace(line[eq+1:]) + if len(rest) < 2 || rest[0] != '"' { + return "", "", false + } + rest = rest[1:] + end := strings.IndexByte(rest, '"') + if end < 0 { + return "", "", false + } + return key, rest[:end], true +} + +// readBounded opens path and returns its contents, refusing anything +// that is not a regular file or that exceeds MaxFileSize +func (s *Scanner) readBounded(path string) ([]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + info, err := f.Stat() + if err != nil { + return nil, err + } + if !info.Mode().IsRegular() { + return nil, errors.New("not a regular file") + } + if s.MaxFileSize > 0 && info.Size() > s.MaxFileSize { + if s.Diag != nil { + s.Diag("warn", path, fmt.Sprintf("skipping: size %d exceeds max %d", info.Size(), s.MaxFileSize)) + } + return nil, fmt.Errorf("file %s exceeds max size %d", path, s.MaxFileSize) + } + return io.ReadAll(f) +} diff --git a/internal/ecosystem/cargo/cargo_test.go b/internal/ecosystem/cargo/cargo_test.go new file mode 100644 index 0000000..35597cb --- /dev/null +++ b/internal/ecosystem/cargo/cargo_test.go @@ -0,0 +1,252 @@ +package cargo + +import ( + "os" + "path/filepath" + "sort" + "strings" + "testing" + + "github.com/perplexityai/bumblebee/internal/model" +) + +func TestIsCrates2JSON(t *testing.T) { + cases := []struct { + path string + want bool + }{ + {filepath.Join("home", ".cargo", ".crates2.json"), true}, + {filepath.Join("home", ".cargo", ".crates.json"), false}, + {filepath.Join("home", "elsewhere", ".crates2.json"), false}, + {filepath.Join("home", ".cargo", "registry", ".crates2.json"), false}, + } + for _, c := range cases { + got := IsCrates2JSON(c.path) + if got != c.want { + t.Errorf("IsCrates2JSON(%q) = %v, want %v", c.path, got, c.want) + } + } +} + +func TestIsCargoLock(t *testing.T) { + if !IsCargoLock("Cargo.lock") { + t.Errorf("IsCargoLock(Cargo.lock) = false") + } + if IsCargoLock("cargo.lock") { + t.Errorf("IsCargoLock should be case-sensitive") + } + if IsCargoLock("Cargo.toml") { + t.Errorf("IsCargoLock matched Cargo.toml") + } +} + +func TestParseCrates2InstallKey(t *testing.T) { + cases := []struct { + key string + wantName string + wantVersion string + wantOK bool + }{ + { + "cargo-auditable 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)", + "cargo-auditable", "0.7.4", true, + }, + { + "ripgrep 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ripgrep", "13.0.0", true, + }, + { + "depsguard 0.1.33", + "depsguard", "0.1.33", true, + }, + {"", "", "", false}, + {"single-token", "", "", false}, + } + for _, c := range cases { + name, version, ok := parseCrates2InstallKey(c.key) + if ok != c.wantOK || name != c.wantName || version != c.wantVersion { + t.Errorf("parseCrates2InstallKey(%q) = (%q, %q, %v), want (%q, %q, %v)", + c.key, name, version, ok, c.wantName, c.wantVersion, c.wantOK) + } + } +} + +func TestScanCrates2JSON(t *testing.T) { + dir := t.TempDir() + cargoDir := filepath.Join(dir, ".cargo") + if err := os.MkdirAll(cargoDir, 0o755); err != nil { + t.Fatal(err) + } + path := filepath.Join(cargoDir, ".crates2.json") + body := `{ + "installs": { + "cargo-auditable 0.7.4 (registry+https://github.com/rust-lang/crates.io-index)": { + "bins": ["cargo-auditable"] + }, + "ripgrep 13.0.0 (registry+https://github.com/rust-lang/crates.io-index)": { + "bins": ["rg"] + } + } + }` + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + if !IsCrates2JSON(path) { + t.Fatalf("IsCrates2JSON(%q) = false", path) + } + + var out []model.Record + s := &Scanner{ + MaxFileSize: 1 << 20, + Emit: func(r model.Record) { out = append(out, r) }, + Diag: func(string, string, string) {}, + } + if err := s.ScanCrates2JSON(path, model.Record{}); err != nil { + t.Fatalf("ScanCrates2JSON: %v", err) + } + if len(out) != 2 { + t.Fatalf("records = %d, want 2", len(out)) + } + sort.Slice(out, func(i, j int) bool { return out[i].PackageName < out[j].PackageName }) + if out[0].PackageName != "cargo-auditable" || out[0].Version != "0.7.4" { + t.Errorf("cargo-auditable record: %+v", out[0]) + } + if out[1].PackageName != "ripgrep" || out[1].Version != "13.0.0" { + t.Errorf("ripgrep record: %+v", out[1]) + } + for _, r := range out { + if r.Ecosystem != model.EcosystemCargo { + t.Errorf("ecosystem = %q, want %q", r.Ecosystem, model.EcosystemCargo) + } + if r.SourceType != "cargo-crates2-installs" { + t.Errorf("source_type = %q", r.SourceType) + } + if r.PackageManager != "cargo" { + t.Errorf("package_manager = %q", r.PackageManager) + } + if r.Confidence != "high" { + t.Errorf("confidence = %q", r.Confidence) + } + if r.DirectDependency == nil || !*r.DirectDependency { + t.Errorf("direct_dependency = %v, want true", r.DirectDependency) + } + } +} + +func TestScanCrates2JSONMalformed(t *testing.T) { + dir := t.TempDir() + cargoDir := filepath.Join(dir, ".cargo") + if err := os.MkdirAll(cargoDir, 0o755); err != nil { + t.Fatal(err) + } + path := filepath.Join(cargoDir, ".crates2.json") + if err := os.WriteFile(path, []byte("not json"), 0o644); err != nil { + t.Fatal(err) + } + var diagnostics []string + s := &Scanner{ + MaxFileSize: 1 << 20, + Emit: func(model.Record) { t.Fatal("Emit should not be called on malformed input") }, + Diag: func(_, _, msg string) { diagnostics = append(diagnostics, msg) }, + } + if err := s.ScanCrates2JSON(path, model.Record{}); err != nil { + t.Fatalf("ScanCrates2JSON returned error for malformed input: %v", err) + } + if len(diagnostics) == 0 || !strings.Contains(diagnostics[0], "malformed") { + t.Fatalf("expected malformed diagnostic, got %v", diagnostics) + } +} + +func TestScanCargoLock(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "Cargo.lock") + body := `# Auto-generated +version = 4 + +[[package]] +name = "addr2line" +version = "0.25.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b5d307320b3181d6d7954e663bd7c774a838b8220fe0593c86d9fb09f498b4b" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "myworkspace-local" +version = "0.0.0" +dependencies = [ + "core", +] +` + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + var out []model.Record + s := &Scanner{ + MaxFileSize: 1 << 20, + Emit: func(r model.Record) { out = append(out, r) }, + } + if err := s.ScanCargoLock(path, model.Record{}); err != nil { + t.Fatalf("ScanCargoLock: %v", err) + } + if len(out) != 2 { + t.Fatalf("records = %d, want 2 (workspace-local must be dropped): %+v", len(out), out) + } + sort.Slice(out, func(i, j int) bool { return out[i].PackageName < out[j].PackageName }) + if out[0].PackageName != "addr2line" || out[0].Version != "0.25.1" { + t.Errorf("addr2line record: %+v", out[0]) + } + if out[1].PackageName != "adler2" || out[1].Version != "2.0.1" { + t.Errorf("adler2 record: %+v", out[1]) + } + for _, r := range out { + if r.Ecosystem != model.EcosystemCargo { + t.Errorf("ecosystem = %q", r.Ecosystem) + } + if r.SourceType != "cargo-lock" { + t.Errorf("source_type = %q", r.SourceType) + } + if r.ProjectPath != dir { + t.Errorf("project_path = %q, want %q", r.ProjectPath, dir) + } + if r.Confidence != "high" { + t.Errorf("confidence = %q", r.Confidence) + } + } +} + +func TestScanCargoLockDedupesDuplicateEntries(t *testing.T) { + dir := t.TempDir() + path := filepath.Join(dir, "Cargo.lock") + body := `[[package]] +name = "same" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "same" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +` + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } + var out []model.Record + s := &Scanner{ + MaxFileSize: 1 << 20, + Emit: func(r model.Record) { out = append(out, r) }, + } + if err := s.ScanCargoLock(path, model.Record{}); err != nil { + t.Fatalf("ScanCargoLock: %v", err) + } + if len(out) != 1 { + t.Fatalf("records = %d, want 1 after dedup", len(out)) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 0ba4639..6aef2a1 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -45,6 +45,7 @@ const ( EcosystemBrowserExtension = "browser-extension" EcosystemHomebrew = "homebrew" EcosystemAgentSkill = "agent-skill" + EcosystemCargo = "crates.io" ) var supportedEcosystems = map[string]struct{}{ @@ -58,6 +59,7 @@ var supportedEcosystems = map[string]struct{}{ EcosystemBrowserExtension: {}, EcosystemHomebrew: {}, EcosystemAgentSkill: {}, + EcosystemCargo: {}, } var supportedEcosystemOrder = []string{ @@ -71,6 +73,7 @@ var supportedEcosystemOrder = []string{ EcosystemBrowserExtension, EcosystemHomebrew, EcosystemAgentSkill, + EcosystemCargo, } // SupportedEcosystems returns the emitted ecosystem values supported by v0.1. diff --git a/internal/osv/osv.go b/internal/osv/osv.go index 2bd01b5..6e3d237 100644 --- a/internal/osv/osv.go +++ b/internal/osv/osv.go @@ -168,8 +168,8 @@ type Stats struct { // (https://osv-vulnerabilities.storage.googleapis.com/ecosystems.txt) to // the lowercased values Bumblebee emits on records, so a generated entry // matches the scanner's output. Only the registries Bumblebee inventories -// by package version are mapped; others (crates.io, NuGet, Maven, VSCode, -// Linux distros, ...) have no equivalent and their records are skipped. +// by package version are mapped; others (NuGet, Maven, Linux distros, ...) +// have no equivalent and their records are skipped. var ecosystemMap = map[string]string{ "npm": "npm", "PyPI": "pypi", @@ -177,6 +177,7 @@ var ecosystemMap = map[string]string{ "RubyGems": "rubygems", "Packagist": "packagist", "VSCode": "editor-extension", + "crates.io": "crates.io", } // mapEcosystem returns the Bumblebee ecosystem for an OSV ecosystem diff --git a/internal/osv/osv_test.go b/internal/osv/osv_test.go index 28f3baa..0d09c70 100644 --- a/internal/osv/osv_test.go +++ b/internal/osv/osv_test.go @@ -52,6 +52,7 @@ func TestMapEcosystem(t *testing.T) { "RubyGems": "rubygems", "Packagist": "packagist", "VSCode": "editor-extension", + "crates.io": "crates.io", "Go:something": "go", // suffix after ':' is ignored } for osvEco, want := range supported { @@ -62,7 +63,7 @@ func TestMapEcosystem(t *testing.T) { } // OSV identifiers are case-sensitive and several ecosystems have no // Bumblebee equivalent; none of these must map. - for _, osvEco := range []string{"pypi", "NPM", "crates.io", "NuGet", "Maven", "vscode", "Debian:11", ""} { + for _, osvEco := range []string{"pypi", "NPM", "NuGet", "Maven", "vscode", "Debian:11", ""} { if got, ok := mapEcosystem(osvEco); ok { t.Errorf("mapEcosystem(%q) = (%q, true), want no mapping", osvEco, got) } @@ -88,7 +89,7 @@ func TestConvertDropsNonMaliciousVuln(t *testing.T) { func TestConvertSkipsWithdrawnUnsupportedAndRangeOnly(t *testing.T) { records := []Record{ {ID: "MAL-withdrawn", Withdrawn: "2026-01-01T00:00:00Z", Affected: []Affected{{Package: Package{Ecosystem: "npm", Name: "x"}, Versions: []string{"1.0.0"}}}}, - {ID: "MAL-cargo", Affected: []Affected{{Package: Package{Ecosystem: "crates.io", Name: "y"}, Versions: []string{"1.0.0"}}}}, + {ID: "MAL-nuget", Affected: []Affected{{Package: Package{Ecosystem: "NuGet", Name: "y"}, Versions: []string{"1.0.0"}}}}, {ID: "MAL-rangeonly", Affected: []Affected{{Package: Package{Ecosystem: "npm", Name: "z"}}}}, // Empty package name must be dropped: an entry with an empty // package would make exposure.Load reject the whole catalog. @@ -206,7 +207,7 @@ func TestBuildCatalogCommentDeterministic(t *testing.T) { // Non-malicious + unsupported eco + withdrawn + bad id so all // skip counters are exercised in the comment. {ID: "GHSA-vuln", Affected: []Affected{{Package: Package{Ecosystem: "npm", Name: "b"}, Versions: []string{"1.0.0"}}}}, - {ID: "MAL-crates", Affected: []Affected{{Package: Package{Ecosystem: "crates.io", Name: "c"}, Versions: []string{"1.0.0"}}}}, + {ID: "MAL-nuget", Affected: []Affected{{Package: Package{Ecosystem: "NuGet", Name: "c"}, Versions: []string{"1.0.0"}}}}, {ID: "MAL-withdrawn", Withdrawn: "2026-01-01T00:00:00Z", Affected: []Affected{{Package: Package{Ecosystem: "npm", Name: "d"}, Versions: []string{"1.0.0"}}}}, {ID: "", Affected: []Affected{{Package: Package{Ecosystem: "npm", Name: "e"}, Versions: []string{"1.0.0"}}}}, // bad-id (empty) } diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index d0de709..dd5e9ea 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -20,6 +20,7 @@ import ( "github.com/perplexityai/bumblebee/internal/ecosystem/browserext" "github.com/perplexityai/bumblebee/internal/ecosystem/bun" + "github.com/perplexityai/bumblebee/internal/ecosystem/cargo" "github.com/perplexityai/bumblebee/internal/ecosystem/composer" "github.com/perplexityai/bumblebee/internal/ecosystem/editorext" "github.com/perplexityai/bumblebee/internal/ecosystem/gomod" @@ -253,6 +254,7 @@ func Run(ctx context.Context, cfg Config) (Result, error) { extS := &editorext.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} bxS := &browserext.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} hbS := &homebrew.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} + cgS := &cargo.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} type job struct { kind string @@ -326,6 +328,10 @@ func Run(ctx context.Context, cfg Config) (Result, error) { err = hbS.ScanFormulaReceipt(j.path, j.extra1, j.extra2, j.projectPath, cfg.BaseRecord) case "homebrew-cask": err = hbS.ScanCaskMetadata(j.path, j.extra1, j.extra2, j.projectPath, cfg.BaseRecord) + case "cargo-lock": + err = cgS.ScanCargoLock(j.path, cfg.BaseRecord) + case "cargo-crates2": + err = cgS.ScanCrates2JSON(j.path, cfg.BaseRecord) } if err != nil { cfg.Emitter.Diag("error", j.path, err.Error()) @@ -418,6 +424,10 @@ func Run(ctx context.Context, cfg Config) (Result, error) { send(job{kind: "go-sum", path: path}) case enabled(model.EcosystemGo) && gomod.IsGoMod(base): send(job{kind: "go-mod", path: path}) + case enabled(model.EcosystemCargo) && cargo.IsCargoLock(base): + send(job{kind: "cargo-lock", path: path}) + case enabled(model.EcosystemCargo) && base == ".crates2.json" && cargo.IsCrates2JSON(path): + send(job{kind: "cargo-crates2", path: path}) case enabled(model.EcosystemRubyGems) && rubygems.IsGemfileLock(base): send(job{kind: "rb-lock", path: path}) case enabled(model.EcosystemRubyGems) && rubygems.IsGemspec(base):