diff --git a/internal/cli/build.go b/internal/cli/build.go index 946623846..86b17661f 100644 --- a/internal/cli/build.go +++ b/internal/cli/build.go @@ -60,6 +60,8 @@ func buildCmd() *cobra.Command { var includePaths []string var ignoreSignatures bool var sizeLimits options.SizeLimits + var extraPythonPackages []string + var extraPythonIndexes []string cmd := &cobra.Command{ Use: "build", @@ -119,6 +121,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin build.WithIncludePaths(includePaths), build.WithIgnoreSignatures(ignoreSignatures), build.WithSizeLimits(sizeLimits), + build.WithExtraEcosystemPackages("python", extraPythonPackages), + build.WithExtraEcosystemIndexes("python", extraPythonIndexes), ) }, } @@ -139,6 +143,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin cmd.Flags().StringVar(&lockfile, "lockfile", "", "a path to .lock.json file (e.g. produced by apko lock) that constraints versions of packages to the listed ones (default '' means no additional constraints)") cmd.Flags().StringSliceVar(&includePaths, "include-paths", []string{}, "Additional include paths where to look for input files (config, base image, etc.). By default apko will search for paths only in workdir. Include paths may be absolute, or relative. Relative paths are interpreted relative to workdir. For adding extra paths for packages, use --repository-append.") cmd.Flags().BoolVar(&ignoreSignatures, "ignore-signatures", false, "ignore repository signature verification") + cmd.Flags().StringSliceVar(&extraPythonPackages, "ecosystem-python-package-append", []string{}, "extra Python packages to include (e.g., flask==3.0.0)") + cmd.Flags().StringSliceVar(&extraPythonIndexes, "ecosystem-python-index-append", []string{}, "extra Python package index URLs to use") addClientLimitFlags(cmd, &sizeLimits) return cmd } diff --git a/internal/cli/lock.go b/internal/cli/lock.go index 67c10d6ff..e09a7712d 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -35,6 +35,8 @@ import ( apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/build" "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" + _ "chainguard.dev/apko/pkg/ecosystem/python" pkglock "chainguard.dev/apko/pkg/lock" ) @@ -245,6 +247,30 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt } } + // Resolve ecosystem packages + for name, ecoConfig := range ic.Contents.Ecosystems { + installer, ok := ecosystem.Get(name) + if !ok { + return fmt.Errorf("unknown ecosystem: %s", name) + } + for _, arch := range archs { + resolved, err := installer.Resolve(ctx, ecoConfig, arch, "glibc", auth.DefaultAuthenticators) + if err != nil { + return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err) + } + for _, pkg := range resolved { + lock.Contents.EcosystemPackages = append(lock.Contents.EcosystemPackages, pkglock.LockEcosystemPkg{ + Ecosystem: pkg.Ecosystem, + Name: pkg.Name, + Version: pkg.Version, + URL: pkg.URL, + Checksum: pkg.Checksum, + Architecture: arch.ToAPK(), + }) + } + } + } + // Sort keyrings by name for reproducible lock files sort.Slice(lock.Contents.Keyrings, func(i, j int) bool { return lock.Contents.Keyrings[i].Name < lock.Contents.Keyrings[j].Name diff --git a/pkg/build/build.go b/pkg/build/build.go index 007004540..ad385a80f 100644 --- a/pkg/build/build.go +++ b/pkg/build/build.go @@ -41,6 +41,7 @@ import ( apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/baseimg" "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" "chainguard.dev/apko/pkg/options" "chainguard.dev/apko/pkg/paths" "chainguard.dev/apko/pkg/s6" @@ -64,6 +65,11 @@ type Context struct { fs apkfs.FullFS apk *apk.APK baseimg *baseimg.BaseImage + + // ecosystemPkgs holds resolved ecosystem packages with InstalledSize + // populated after installation. Used by buildLayers to create + // separate layers for ecosystem packages. + ecosystemPkgs []ecosystem.ResolvedPackage } func (bc *Context) Summarize(ctx context.Context) { diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index 2200af5f6..fb67df191 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -23,6 +23,7 @@ import ( "encoding/json" "fmt" "io" + "maps" "os" "path/filepath" "runtime" @@ -36,6 +37,8 @@ import ( ldsocache "chainguard.dev/apko/internal/ldso-cache" "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" + _ "chainguard.dev/apko/pkg/ecosystem/python" // Register python ecosystem installer. "chainguard.dev/apko/pkg/lock" "chainguard.dev/apko/pkg/options" ) @@ -177,6 +180,22 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) } } + // Install ecosystem packages (python, etc.) after APK packages so that + // the language runtime is available for version detection. + if len(bc.ic.Contents.Ecosystems) > 0 { + env, ecoPkgs, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch, bc.o.Auth) + if err != nil { + return nil, fmt.Errorf("installing ecosystem packages: %w", err) + } + bc.ecosystemPkgs = ecoPkgs + if len(env) > 0 { + if bc.ic.Environment == nil { + bc.ic.Environment = make(map[string]string) + } + maps.Copy(bc.ic.Environment, env) + } + } + // For now adding additional accounts is banned when using base image. On the other hand, we don't want to // wipe out the users set in base. // If one wants to add a support for adding additional users they would need to look into this piece of code. diff --git a/pkg/build/layers.go b/pkg/build/layers.go index f0716616a..11c0fca4d 100644 --- a/pkg/build/layers.go +++ b/pkg/build/layers.go @@ -25,6 +25,7 @@ import ( "os" "path" "slices" + "strings" "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" @@ -50,10 +51,10 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { return nil, fmt.Errorf("building filesystem: %w", err) } - pkgs := make([]*apk.Package, 0, len(diffs)) + apkPkgs := make([]*apk.Package, 0, len(diffs)) pkgToDiff := map[*apk.Package][]byte{} for _, pkgDiff := range diffs { - pkgs = append(pkgs, pkgDiff.Package) + apkPkgs = append(apkPkgs, pkgDiff.Package) pkgToDiff[pkgDiff.Package] = pkgDiff.Diff } @@ -69,11 +70,28 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { return nil, err } - // Use our layering strategy to partition packages into a set of Budget groups. - groups, err := groupByOriginAndSize(pkgs, bc.ic.Layering.Budget) + // Group APK packages by origin/replaces. + apkGroups, err := groupAPKByOrigin(apkPkgs) if err != nil { - return nil, fmt.Errorf("grouping packages: %w", err) + return nil, fmt.Errorf("grouping apk packages: %w", err) } + + // Create a separate group for each ecosystem package. + // Each gets its own group since ecosystem packages are independently versioned + // and don't have APK concepts like origin or replaces. + ecoGroups := make([]*group, 0, len(bc.ecosystemPkgs)) + for _, ep := range bc.ecosystemPkgs { + owner := ep.OwnerName() + ecoGroups = append(ecoGroups, &group{ + owners: []string{owner}, + size: ep.InstalledSize, + tiebreaker: owner, + }) + } + + // Combine all groups and apply the shared budget. + apkGroups = append(apkGroups, ecoGroups...) + groups := applyBudget(apkGroups, bc.ic.Layering.Budget) log.Infof("Building %d layers with budget %d", len(groups), bc.ic.Layering.Budget) for i, g := range groups { @@ -82,6 +100,13 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { for _, pkg := range g.pkgs { log.Infof(" - %s=%s", pkg.Name, pkg.Version) } + for _, owner := range g.owners { + // Ecosystem owners are namespaced with a colon (e.g. "python:flask"), + // APK owners are bare package names logged above via g.pkgs. + if strings.Contains(owner, ":") { + log.Infof(" - %s", owner) + } + } } // Then partition that single fs.FS into multiple layers based on our layering strategy. @@ -117,6 +142,16 @@ func replacesGroup(rep string, g *group) (bool, error) { } func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { + groups, err := groupAPKByOrigin(pkgs) + if err != nil { + return nil, err + } + return applyBudget(groups, budget), nil +} + +// groupAPKByOrigin groups APK packages by origin and merges replaces relationships. +// It populates both pkgs and owners on each group. Does not apply budget. +func groupAPKByOrigin(pkgs []*apk.Package) ([]*group, error) { // First, we're going to group packages by their origin. byOrigin := map[string]*group{} for _, pkg := range pkgs { @@ -131,6 +166,7 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { } g.pkgs = append(g.pkgs, pkg) + g.owners = append(g.owners, pkg.Name) } // Then we need to merge any packages that replace each other. @@ -189,9 +225,8 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { } } - // Now we need to pick the best groups to keep. - // First pass we'll set the size of each group to the sum of the installed size of all its packages. - groups := make([]*group, 0, budget) + // Compute sizes and deduplicate groups. + groups := make([]*group, 0) seen := map[*group]struct{}{} for v := range maps.Values(byOrigin) { if _, ok := seen[v]; ok { @@ -207,7 +242,14 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { } } - // Then we'll sort by the size and take the top $budget, merging the remainders. + return groups, nil +} + +// applyBudget sorts groups by size descending and merges anything beyond +// the budget into a single overflow group. It also sorts owners/packages +// within each group for consistency. +func applyBudget(groups []*group, budget int) []*group { + // Sort by the size and take the top $budget, merging the remainders. slices.SortFunc(groups, func(a, b *group) int { return cmp.Or( cmp.Compare(b.size, a.size), // Descending size. @@ -223,19 +265,27 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { groups = append(groups, merge(remainder...)) } - // Sort packages too just so they're in a consistent order. + // Sort packages and owners for consistent order. for _, g := range groups { slices.SortFunc(g.pkgs, func(a, b *apk.Package) int { return cmp.Compare(a.Name, b.Name) }) + slices.Sort(g.owners) } - return groups, nil + return groups } type group struct { + // pkgs holds APK packages in this group (used for installed DB splitting). pkgs []*apk.Package + // owners holds all owner names in this group. + // For APK packages this is the package name, for ecosystem packages + // this is the owner string (e.g. "python:flask"). + // Used by splitLayers to route files to the correct layer writer. + owners []string + size uint64 // This is silly but in the event that two groups have identical size, @@ -247,6 +297,7 @@ func merge(groups ...*group) *group { merged := &group{} for _, g := range groups { merged.pkgs = slices.Concat(merged.pkgs, g.pkgs) + merged.owners = slices.Concat(merged.owners, g.owners) merged.size += g.size merged.tiebreaker = max(merged.tiebreaker, g.tiebreaker) } @@ -256,8 +307,8 @@ func merge(groups ...*group) *group { func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToDiff map[*apk.Package][]byte, tmpdir string) ([]v1.Layer, error) { buf := make([]byte, 1<<20) - // We'll create a writer for each layer and a map to quickly access the writer given a package or group. - packageToWriter := map[string]*layerWriter{} + // We'll create a writer for each layer and a map to quickly access the writer given an owner name or group. + ownerToWriter := map[string]*layerWriter{} groupToWriter := map[*group]*layerWriter{} for _, g := range groups { @@ -270,8 +321,8 @@ func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToD w := newLayerWriter(f) groupToWriter[g] = w - for _, pkg := range g.pkgs { - packageToWriter[pkg.Name] = w + for _, owner := range g.owners { + ownerToWriter[owner] = w } } @@ -314,15 +365,17 @@ func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToD // By default, all files go into the top layer. w := top - // However, if a file implements an extension interface that tells us what package owns it, + // However, if a file implements an extension interface that tells us who owns it, // we can use that to determine which layer it belongs to (if any). - if pkger, ok := f.info.(interface { - Package() *apk.Package + // Owner() returns the APK package name for APK-installed files, or the + // ecosystem owner string (e.g. "python:flask") for ecosystem files. + if ownr, ok := f.info.(interface { + Owner() string }); ok { - if pkg := pkger.Package(); pkg != nil { - w, ok = packageToWriter[pkg.Name] + if name := ownr.Owner(); name != "" { + w, ok = ownerToWriter[name] if !ok { - panic(fmt.Errorf("packageToWriter[%q] missing", pkg.Name)) + panic(fmt.Errorf("ownerToWriter[%q] missing", name)) } } } diff --git a/pkg/build/layers_test.go b/pkg/build/layers_test.go index bd5d00243..228f62a70 100644 --- a/pkg/build/layers_test.go +++ b/pkg/build/layers_test.go @@ -25,6 +25,7 @@ import ( "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/tarfs" ) func size(pkgs ...*apk.Package) uint64 { @@ -261,8 +262,8 @@ func TestSplitLayersDirectoryCreation(t *testing.T) { // Create package groups (this will result in multiple layers) groups := []*group{ - {pkgs: []*apk.Package{pkg1}, size: 1000, tiebreaker: "pkg1"}, - {pkgs: []*apk.Package{pkg2}, size: 2000, tiebreaker: "pkg2"}, + {pkgs: []*apk.Package{pkg1}, owners: []string{"pkg1"}, size: 1000, tiebreaker: "pkg1"}, + {pkgs: []*apk.Package{pkg2}, owners: []string{"pkg2"}, size: 2000, tiebreaker: "pkg2"}, } // Create package diffs (minimal content for each package) @@ -348,3 +349,133 @@ func TestSplitLayersDirectoryCreation(t *testing.T) { } } } + +func TestApplyBudgetWithEcosystemGroups(t *testing.T) { + // Simulate APK groups and ecosystem groups competing for budget. + apkGroup := &group{ + pkgs: []*apk.Package{{Name: "glibc", Origin: "glibc", InstalledSize: 6000000}}, + owners: []string{"glibc"}, + size: 6000000, + tiebreaker: "glibc", + } + ecoGroup1 := &group{ + owners: []string{"python:flask"}, + size: 500000, + tiebreaker: "python:flask", + } + ecoGroup2 := &group{ + owners: []string{"python:requests"}, + size: 300000, + tiebreaker: "python:requests", + } + + groups := applyBudget([]*group{apkGroup, ecoGroup1, ecoGroup2}, 3) + if len(groups) != 3 { + t.Fatalf("expected 3 groups, got %d", len(groups)) + } + // Should be sorted by size descending: glibc, flask, requests + if groups[0].owners[0] != "glibc" { + t.Errorf("expected glibc first, got %v", groups[0].owners) + } + if groups[1].owners[0] != "python:flask" { + t.Errorf("expected python:flask second, got %v", groups[1].owners) + } + if groups[2].owners[0] != "python:requests" { + t.Errorf("expected python:requests third, got %v", groups[2].owners) + } + + // With budget=2, the smallest should overflow into the last group. + groups = applyBudget([]*group{ + {owners: []string{"glibc"}, size: 6000000, tiebreaker: "glibc"}, + {owners: []string{"python:flask"}, size: 500000, tiebreaker: "python:flask"}, + {owners: []string{"python:requests"}, size: 300000, tiebreaker: "python:requests"}, + }, 2) + if len(groups) != 2 { + t.Fatalf("expected 2 groups, got %d", len(groups)) + } + if groups[0].owners[0] != "glibc" { + t.Errorf("expected glibc first, got %v", groups[0].owners) + } + // The overflow group should contain both ecosystem packages. + if len(groups[1].owners) != 2 { + t.Errorf("expected 2 owners in overflow, got %d: %v", len(groups[1].owners), groups[1].owners) + } +} + +func TestSplitLayersWithEcosystemOwners(t *testing.T) { + // Use tarfs which supports Owner() on file info. + fsys := tarfs.New() + + // Create some APK-like content (without actual package ownership). + if err := fsys.MkdirAll("usr/lib/apk/db", 0755); err != nil { + t.Fatal(err) + } + if err := fsys.WriteFile("usr/lib/apk/db/installed", []byte(""), 0644); err != nil { + t.Fatal(err) + } + + // Simulate ecosystem package file installation with owner tagging. + fsys.SetCurrentOwner("python:flask") + if err := fsys.MkdirAll("usr/lib/python3.12/site-packages/flask", 0755); err != nil { + t.Fatal(err) + } + if err := fsys.WriteFile("usr/lib/python3.12/site-packages/flask/__init__.py", []byte("# flask init"), 0644); err != nil { + t.Fatal(err) + } + fsys.SetCurrentOwner("") + + fsys.SetCurrentOwner("python:requests") + if err := fsys.MkdirAll("usr/lib/python3.12/site-packages/requests", 0755); err != nil { + t.Fatal(err) + } + if err := fsys.WriteFile("usr/lib/python3.12/site-packages/requests/__init__.py", []byte("# requests init"), 0644); err != nil { + t.Fatal(err) + } + fsys.SetCurrentOwner("") + + // Create groups: one for each ecosystem package. + groups := []*group{ + {owners: []string{"python:flask"}, size: 100, tiebreaker: "python:flask"}, + {owners: []string{"python:requests"}, size: 200, tiebreaker: "python:requests"}, + } + + tmpDir := t.TempDir() + pkgToDiff := map[*apk.Package][]byte{} + + ctx := context.Background() + layers, err := splitLayers(ctx, fsys, groups, pkgToDiff, tmpDir) + if err != nil { + t.Fatalf("splitLayers failed: %v", err) + } + + // 2 ecosystem groups + 1 top layer = 3 layers + if len(layers) != 3 { + t.Fatalf("expected 3 layers, got %d", len(layers)) + } + + // Check that flask files ended up in layer 0 and requests in layer 1. + for i, want := range []string{"flask", "requests"} { + rc, err := layers[i].Uncompressed() + if err != nil { + t.Fatal(err) + } + tr := tar.NewReader(rc) + found := false + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + if hdr.Name == fmt.Sprintf("usr/lib/python3.12/site-packages/%s/__init__.py", want) { + found = true + } + } + rc.Close() + if !found { + t.Errorf("layer %d missing %s/__init__.py", i, want) + } + } +} diff --git a/pkg/build/options.go b/pkg/build/options.go index 0e25edd0c..85a9a1d7d 100644 --- a/pkg/build/options.go +++ b/pkg/build/options.go @@ -266,3 +266,35 @@ func WithSizeLimits(limits options.SizeLimits) Option { return nil } } + +// WithExtraEcosystemPackages adds extra ecosystem packages to the build. +func WithExtraEcosystemPackages(ecosystem string, packages []string) Option { + return func(bc *Context) error { + if len(packages) == 0 { + return nil + } + if bc.ic.Contents.Ecosystems == nil { + bc.ic.Contents.Ecosystems = make(map[string]types.EcosystemConfig) + } + eco := bc.ic.Contents.Ecosystems[ecosystem] + eco.Packages = append(eco.Packages, packages...) + bc.ic.Contents.Ecosystems[ecosystem] = eco + return nil + } +} + +// WithExtraEcosystemIndexes adds extra ecosystem indexes to the build. +func WithExtraEcosystemIndexes(ecosystem string, indexes []string) Option { + return func(bc *Context) error { + if len(indexes) == 0 { + return nil + } + if bc.ic.Contents.Ecosystems == nil { + bc.ic.Contents.Ecosystems = make(map[string]types.EcosystemConfig) + } + eco := bc.ic.Contents.Ecosystems[ecosystem] + eco.Indexes = append(eco.Indexes, indexes...) + bc.ic.Contents.Ecosystems[ecosystem] = eco + return nil + } +} diff --git a/pkg/build/types/image_configuration.go b/pkg/build/types/image_configuration.go index 245452a1f..04b9e6a8e 100644 --- a/pkg/build/types/image_configuration.go +++ b/pkg/build/types/image_configuration.go @@ -181,6 +181,27 @@ func (i *ImageContents) MergeInto(target *ImageContents) error { if target.BaseImage == nil { target.BaseImage = i.BaseImage } + // Merge ecosystem configs + if len(i.Ecosystems) > 0 { + if target.Ecosystems == nil { + target.Ecosystems = make(map[string]EcosystemConfig) + } + for name, eco := range i.Ecosystems { + if existing, ok := target.Ecosystems[name]; ok { + existing.Indexes = slices.Concat(eco.Indexes, existing.Indexes) + existing.Packages = slices.Concat(eco.Packages, existing.Packages) + if existing.PythonVersion == "" { + existing.PythonVersion = eco.PythonVersion + } + if existing.Venv == "" { + existing.Venv = eco.Venv + } + target.Ecosystems[name] = existing + } else { + target.Ecosystems[name] = eco + } + } + } return nil } @@ -295,6 +316,14 @@ func (ic *ImageConfiguration) Summarize(ctx context.Context) { log.Infof(" - gid=%d(%s) members=%v", g.GID, g.GroupName, g.Members) } } + if len(ic.Contents.Ecosystems) > 0 { + log.Infof(" ecosystems:") + for name, eco := range ic.Contents.Ecosystems { + log.Infof(" %s:", name) + log.Infof(" indexes: %v", eco.Indexes) + log.Infof(" packages: %v", eco.Packages) + } + } if len(ic.Annotations) > 0 { log.Infof(" annotations:") for k, v := range ic.Annotations { diff --git a/pkg/build/types/schema.json b/pkg/build/types/schema.json index 071b0c7ce..cdbede06d 100644 --- a/pkg/build/types/schema.json +++ b/pkg/build/types/schema.json @@ -31,6 +31,35 @@ "additionalProperties": false, "type": "object" }, + "EcosystemConfig": { + "properties": { + "indexes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Indexes is a list of package index URLs (e.g., PyPI simple API URLs)." + }, + "packages": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Packages is a list of package specifications (e.g., \"flask==3.0.0\")." + }, + "python_version": { + "type": "string", + "description": "PythonVersion overrides auto-detection of the Python version (e.g., \"3.12\")." + }, + "venv": { + "type": "string", + "description": "Venv is an optional path for a virtual environment (e.g., \"/app/venv\").\nWhen set, packages are installed into the venv instead of the system site-packages,\nand VIRTUAL_ENV / PATH are set automatically." + } + }, + "additionalProperties": false, + "type": "object", + "description": "EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., python)." + }, "Group": { "properties": { "groupname": { @@ -217,6 +246,13 @@ "baseimage": { "$ref": "#/$defs/BaseImageDescriptor", "description": "Optional: Base image to build on top of. Warning: Experimental." + }, + "ecosystems": { + "additionalProperties": { + "$ref": "#/$defs/EcosystemConfig" + }, + "type": "object", + "description": "Optional: Non-APK ecosystem packages to install (e.g., pip packages)." } }, "additionalProperties": false, diff --git a/pkg/build/types/types.go b/pkg/build/types/types.go index e920acc39..357512c7a 100644 --- a/pkg/build/types/types.go +++ b/pkg/build/types/types.go @@ -104,6 +104,20 @@ type BaseImageDescriptor struct { APKIndex string `json:"apkindex,omitempty" yaml:"apkindex,omitempty"` } +// EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., python). +type EcosystemConfig struct { + // Indexes is a list of package index URLs (e.g., PyPI simple API URLs). + Indexes []string `json:"indexes,omitempty" yaml:"indexes,omitempty"` + // Packages is a list of package specifications (e.g., "flask==3.0.0"). + Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` + // PythonVersion overrides auto-detection of the Python version (e.g., "3.12"). + PythonVersion string `json:"python_version,omitempty" yaml:"python_version,omitempty"` + // Venv is an optional path for a virtual environment (e.g., "/app/venv"). + // When set, packages are installed into the venv instead of the system site-packages, + // and VIRTUAL_ENV / PATH are set automatically. + Venv string `json:"venv,omitempty" yaml:"venv,omitempty"` +} + type ImageContents struct { // A list of apk repositories to use for pulling packages at build time, // which are not installed into /etc/apk/repositories in the image (to @@ -122,6 +136,8 @@ type ImageContents struct { Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` // Optional: Base image to build on top of. Warning: Experimental. BaseImage *BaseImageDescriptor `json:"baseimage,omitempty" yaml:"baseimage,omitempty" apko:"experimental"` + // Optional: Non-APK ecosystem packages to install (e.g., pip packages). + Ecosystems map[string]EcosystemConfig `json:"ecosystems,omitempty" yaml:"ecosystems,omitempty"` } // MarshalYAML implements yaml.Marshaler for ImageContents, redacting URLs in @@ -138,6 +154,13 @@ func (i ImageContents) MarshalYAML() (any, error) { return nil, err } + for name, eco := range ri.Ecosystems { + if err := processRepositoryURLs(eco.Indexes); err != nil { + return nil, err + } + ri.Ecosystems[name] = eco + } + for idx, key := range ri.Keyring { rawURL := key parsed, err := url.Parse(rawURL) diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go new file mode 100644 index 000000000..f16c772fa --- /dev/null +++ b/pkg/ecosystem/ecosystem.go @@ -0,0 +1,170 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ecosystem + +import ( + "context" + "fmt" + "maps" + "strings" + "sync" + + "chainguard.dev/apko/pkg/apk/auth" + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/build/types" +) + +// ResolvedPackage represents a package that has been resolved to a specific +// version and download URL. +type ResolvedPackage struct { + Ecosystem string + Name string + Version string + URL string + Checksum string // "sha256:" + SignatureURL string // optional: signature bundle URL (from data-signature) + ProvenanceURL string // optional: provenance data URL (from data-provenance) + + // InstalledSize is populated after installation with the approximate + // bytes written for this package. Used for layering budget decisions. + InstalledSize uint64 +} + +// OwnerName returns the namespaced owner string used for filesystem tagging +// and layer routing (e.g. "python:flask"). The colon ensures no collision +// with APK package names. +func (rp ResolvedPackage) OwnerName() string { + return rp.Ecosystem + ":" + rp.Name +} + +// Installer is the interface that ecosystem package installers must implement. +type Installer interface { + // Name returns the ecosystem name (e.g., "python"). + Name() string + // Resolve resolves the requested packages to specific versions and URLs. + // libc is "musl" or "glibc", detected from the image filesystem. + Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, libc string, a auth.Authenticator) ([]ResolvedPackage, error) + // Install extracts resolved packages into the filesystem. + // Returns environment variables that should be set in the image configuration. + Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage, config types.EcosystemConfig, a auth.Authenticator) (map[string]string, error) +} + +// RequiredAPKPackagesFunc returns APK packages that an ecosystem requires. +type RequiredAPKPackagesFunc func(config types.EcosystemConfig) []string + +var ( + registryMu sync.RWMutex + registry = map[string]func() Installer{} + apkPkgsFuncs = map[string]RequiredAPKPackagesFunc{} +) + +// Register registers an ecosystem installer factory. +func Register(name string, factory func() Installer) { + registryMu.Lock() + defer registryMu.Unlock() + registry[name] = factory +} + +// RegisterRequiredAPKPackages registers a function that returns APK packages +// required by the named ecosystem. +func RegisterRequiredAPKPackages(name string, fn RequiredAPKPackagesFunc) { + registryMu.Lock() + defer registryMu.Unlock() + apkPkgsFuncs[name] = fn +} + +// RequiredPackages returns APK packages required by all configured ecosystems. +// These should be injected into ImageContents.Packages before resolution. +func RequiredPackages(ecosystems map[string]types.EcosystemConfig) []string { + registryMu.RLock() + defer registryMu.RUnlock() + var pkgs []string + for name, config := range ecosystems { + if fn, ok := apkPkgsFuncs[name]; ok { + pkgs = append(pkgs, fn(config)...) + } + } + return pkgs +} + +// Get returns an installer for the named ecosystem. +func Get(name string) (Installer, bool) { + registryMu.RLock() + defer registryMu.RUnlock() + factory, ok := registry[name] + if !ok { + return nil, false + } + return factory(), true +} + +// detectLibc checks /etc/os-release to determine the image's libc. +// Alpine uses musl; everything else uses glibc. +func detectLibc(fs apkfs.FullFS) string { + data, err := fs.ReadFile("etc/os-release") + if err != nil { + return "glibc" + } + for line := range strings.SplitSeq(string(data), "\n") { + if line == "ID=alpine" { + return "musl" + } + } + return "glibc" +} + +// OwnerTagger is implemented by filesystems that support tagging files +// with an owner name for layering purposes. +type OwnerTagger interface { + SetCurrentOwner(owner string) + OwnerSize(owner string) uint64 +} + +// InstallAll installs packages for all configured ecosystems. +// Returns environment variables, the resolved packages with InstalledSize +// populated, and any error. +// +// Installers are responsible for tagging files with per-package ownership +// via the OwnerTagger interface on the filesystem, if supported. +func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) (map[string]string, []ResolvedPackage, error) { + env := map[string]string{} + var installed []ResolvedPackage + + libc := detectLibc(fs) + + for name, config := range ecosystems { + installer, ok := Get(name) + if !ok { + return nil, nil, fmt.Errorf("unknown ecosystem: %s", name) + } + resolved, err := installer.Resolve(ctx, config, arch, libc, a) + if err != nil { + return nil, nil, fmt.Errorf("resolving %s packages: %w", name, err) + } + if len(resolved) == 0 { + continue + } + + vars, err := installer.Install(ctx, fs, resolved, config, a) + if err != nil { + return nil, nil, fmt.Errorf("installing %s packages: %w", name, err) + } + + installed = append(installed, resolved...) + + maps.Copy(env, vars) + } + return env, installed, nil +} diff --git a/pkg/ecosystem/python/platform.go b/pkg/ecosystem/python/platform.go new file mode 100644 index 000000000..d423c7014 --- /dev/null +++ b/pkg/ecosystem/python/platform.go @@ -0,0 +1,171 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "fmt" + "strings" + + "chainguard.dev/apko/pkg/build/types" +) + +// archToMachine maps OCI architecture strings to the Python/Linux machine +// string used in wheel platform tags. +var archToMachine = map[types.Architecture]string{ + types.ParseArchitecture("amd64"): "x86_64", + types.ParseArchitecture("arm64"): "aarch64", + types.ParseArchitecture("arm/v7"): "armv7l", + types.ParseArchitecture("arm/v6"): "armv6l", + types.ParseArchitecture("386"): "i686", + types.ParseArchitecture("ppc64le"): "ppc64le", + types.ParseArchitecture("s390x"): "s390x", + types.ParseArchitecture("riscv64"): "riscv64", + types.ParseArchitecture("loong64"): "loongarch64", +} + +// isLinuxPlatformTag checks whether a single platform tag (e.g. +// "musllinux_1_2_x86_64") targets the given machine architecture and +// is compatible with the image's libc. musl images only accept musllinux +// wheels; glibc images only accept manylinux wheels. +func isLinuxPlatformTag(tag, machine string, libc string) bool { + if !strings.HasSuffix(tag, "_"+machine) { + return false + } + if tag == "linux_"+machine { + return true + } + if libc == "musl" { + return strings.HasPrefix(tag, "musllinux_") + } + return strings.HasPrefix(tag, "manylinux") +} + +// isBinaryWheel returns true if the wheel targets a specific platform +// (not pure-python "any"). +func isBinaryWheel(w wheelFileParts) bool { + return w.PlatformTag != "any" +} + +// wheelFileParts holds the parsed components of a wheel filename per PEP 427. +// Format: {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl +type wheelFileParts struct { + Distribution string + Version string + BuildTag string + PythonTag string + ABITag string + PlatformTag string +} + +// parseWheelFilename parses a wheel filename per PEP 427. +func parseWheelFilename(filename string) (wheelFileParts, error) { + name := strings.TrimSuffix(filename, ".whl") + if name == filename { + return wheelFileParts{}, fmt.Errorf("not a wheel file: %s", filename) + } + + parts := strings.Split(name, "-") + switch len(parts) { + case 5: + return wheelFileParts{ + Distribution: parts[0], + Version: parts[1], + PythonTag: parts[2], + ABITag: parts[3], + PlatformTag: parts[4], + }, nil + case 6: + return wheelFileParts{ + Distribution: parts[0], + Version: parts[1], + BuildTag: parts[2], + PythonTag: parts[3], + ABITag: parts[4], + PlatformTag: parts[5], + }, nil + default: + return wheelFileParts{}, fmt.Errorf("invalid wheel filename: %s", filename) + } +} + +// isCompatibleWheel checks whether a wheel file is compatible with the given +// Python version, architecture, and libc. +func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Architecture, libc string) bool { + // Check python tag compatibility + if !isCompatiblePythonTag(w.PythonTag, pythonVersion) { + return false + } + + // Check ABI compatibility + if !isCompatibleABI(w.ABITag, pythonVersion) { + return false + } + + // Check platform compatibility + return isCompatiblePlatform(w.PlatformTag, arch, libc) +} + +// isCompatiblePythonTag checks if the wheel's python tag is compatible. +// E.g., "py3", "cp312", "py2.py3" +func isCompatiblePythonTag(tag, pythonVersion string) bool { + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for t := range strings.SplitSeq(tag, ".") { + if t == "py3" || t == "py2.py3" || t == cpTag { + return true + } + } + return false +} + +// isCompatibleABI checks if the wheel's ABI tag is compatible. +func isCompatibleABI(tag, pythonVersion string) bool { + if tag == "none" { + return true + } + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for t := range strings.SplitSeq(tag, ".") { + if t == "abi3" || t == cpTag { + return true + } + } + return false +} + +// isCompatiblePlatform checks if the wheel's platform tag is compatible +// with the given architecture and libc, without version limits. +func isCompatiblePlatform(tag string, arch types.Architecture, libc string) bool { + if tag == "any" { + return true + } + machine, ok := archToMachine[arch] + if !ok { + return false + } + for t := range strings.SplitSeq(tag, ".") { + if isLinuxPlatformTag(t, machine, libc) { + return true + } + } + return false +} + +// isBetterWheel returns true if candidate is a better choice than current. +// Prefers binary wheels over pure-python. +func isBetterWheel(current, candidate wheelFileParts) bool { + if !isBinaryWheel(current) && isBinaryWheel(candidate) { + return true + } + return false +} diff --git a/pkg/ecosystem/python/platform_test.go b/pkg/ecosystem/python/platform_test.go new file mode 100644 index 000000000..466d92452 --- /dev/null +++ b/pkg/ecosystem/python/platform_test.go @@ -0,0 +1,226 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "testing" + + "chainguard.dev/apko/pkg/build/types" +) + +func TestIsLinuxPlatformTag(t *testing.T) { + tests := []struct { + tag, machine string + libc string + want bool + }{ + // musl accepts musllinux, rejects manylinux + {"musllinux_1_2_x86_64", "x86_64", "musl", true}, + {"manylinux_2_17_x86_64", "x86_64", "musl", false}, + // glibc accepts manylinux, rejects musllinux + {"manylinux_2_17_x86_64", "x86_64", "glibc", true}, + {"manylinux_2_99_x86_64", "x86_64", "glibc", true}, // no version ceiling + {"manylinux2014_x86_64", "x86_64", "glibc", true}, // legacy alias + {"manylinux1_i686", "i686", "glibc", true}, // legacy alias + {"musllinux_1_2_x86_64", "x86_64", "glibc", false}, + // linux_ fallback works for both + {"linux_x86_64", "x86_64", "musl", true}, + {"linux_x86_64", "x86_64", "glibc", true}, + // wrong machine or non-linux + {"musllinux_1_2_aarch64", "x86_64", "musl", false}, + {"macosx_10_9_x86_64", "x86_64", "glibc", false}, + {"any", "x86_64", "glibc", false}, + } + for _, tt := range tests { + t.Run(tt.tag, func(t *testing.T) { + if got := isLinuxPlatformTag(tt.tag, tt.machine, tt.libc); got != tt.want { + t.Errorf("isLinuxPlatformTag(%q, %q, %v) = %v, want %v", tt.tag, tt.machine, tt.libc, got, tt.want) + } + }) + } +} + +func TestIsBetterWheel(t *testing.T) { + pure := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} + binary := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} + + if !isBetterWheel(pure, binary) { + t.Error("binary wheel should be better than pure python") + } + if isBetterWheel(binary, pure) { + t.Error("pure python should not be better than binary") + } + if isBetterWheel(binary, binary) { + t.Error("identical wheels should not be better") + } +} + +func TestArchToMachine(t *testing.T) { + // All standard architectures should have a mapping. + for _, arch := range []string{"amd64", "arm64", "arm/v7", "arm/v6", "386", "ppc64le", "s390x", "riscv64", "loong64"} { + if _, ok := archToMachine[types.ParseArchitecture(arch)]; !ok { + t.Errorf("archToMachine missing %q", arch) + } + } +} + +func TestParseWheelFilename(t *testing.T) { + tests := []struct { + filename string + wantDist string + wantVer string + wantPy string + wantABI string + wantPlat string + wantErr bool + }{ + { + filename: "Flask-3.0.0-py3-none-any.whl", + wantDist: "Flask", + wantVer: "3.0.0", + wantPy: "py3", + wantABI: "none", + wantPlat: "any", + }, + { + filename: "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl", + wantDist: "numpy", + wantVer: "1.26.0", + wantPy: "cp312", + wantABI: "cp312", + wantPlat: "manylinux_2_17_x86_64", + }, + { + filename: "notawheel.tar.gz", + wantErr: true, + }, + { + filename: "bad-name.whl", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + parts, err := parseWheelFilename(tt.filename) + if tt.wantErr { + if err == nil { + t.Error("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if parts.Distribution != tt.wantDist { + t.Errorf("Distribution = %q, want %q", parts.Distribution, tt.wantDist) + } + if parts.Version != tt.wantVer { + t.Errorf("Version = %q, want %q", parts.Version, tt.wantVer) + } + if parts.PythonTag != tt.wantPy { + t.Errorf("PythonTag = %q, want %q", parts.PythonTag, tt.wantPy) + } + if parts.ABITag != tt.wantABI { + t.Errorf("ABITag = %q, want %q", parts.ABITag, tt.wantABI) + } + if parts.PlatformTag != tt.wantPlat { + t.Errorf("PlatformTag = %q, want %q", parts.PlatformTag, tt.wantPlat) + } + }) + } +} + +func TestIsCompatibleWheel(t *testing.T) { + tests := []struct { + name string + wheel wheelFileParts + pyVer string + arch string + libc string + want bool + }{ + { + name: "pure python wheel on glibc", + wheel: wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: true, + }, + { + name: "pure python wheel on musl", + wheel: wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"}, + pyVer: "3.12", arch: "amd64", libc: "musl", want: true, + }, + { + name: "manylinux on glibc", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: true, + }, + { + name: "manylinux on musl is rejected", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "musl", want: false, + }, + { + name: "musllinux on musl", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "musllinux_1_2_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "musl", want: true, + }, + { + name: "musllinux on glibc is rejected", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "musllinux_1_2_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: false, + }, + { + name: "wrong arch", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_aarch64"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: false, + }, + { + name: "wrong python version", + wheel: wheelFileParts{PythonTag: "cp311", ABITag: "cp311", PlatformTag: "any"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: false, + }, + { + name: "abi3 on glibc", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "abi3", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isCompatibleWheel(tt.wheel, tt.pyVer, types.ParseArchitecture(tt.arch), tt.libc) + if got != tt.want { + t.Errorf("isCompatibleWheel() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestWheelSelection(t *testing.T) { + // When both pure and binary wheels are compatible, binary wins. + pure := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} + binary := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} + + if !isCompatibleWheel(pure, "3.12", types.ParseArchitecture("amd64"), "glibc") { + t.Fatal("pure wheel should be compatible") + } + if !isCompatibleWheel(binary, "3.12", types.ParseArchitecture("amd64"), "glibc") { + t.Fatal("binary wheel should be compatible on glibc") + } + if !isBetterWheel(pure, binary) { + t.Error("binary should be preferred over pure") + } +} diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go new file mode 100644 index 000000000..2c5451c8b --- /dev/null +++ b/pkg/ecosystem/python/python.go @@ -0,0 +1,240 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "fmt" + "io" + "net/http" + "path/filepath" + "strings" + + "github.com/chainguard-dev/clog" + + "chainguard.dev/apko/pkg/apk/auth" + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" +) + +func init() { + ecosystem.Register("python", func() ecosystem.Installer { + return &installer{} + }) + ecosystem.RegisterRequiredAPKPackages("python", RequiredAPKPackages) +} + +// RequiredAPKPackages returns the APK packages needed for the configured +// Python version. When python_version is set, it injects both the base +// interpreter and the full python package so users don't need to list them +// manually in contents.packages. +func RequiredAPKPackages(config types.EcosystemConfig) []string { + if config.PythonVersion == "" { + return nil + } + return []string{ + "python-" + config.PythonVersion + "-base", + "python-" + config.PythonVersion, + } +} + +type installer struct{} + +func (i *installer) Name() string { return "python" } + +func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, libc string, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { + if len(config.Packages) == 0 { + return nil, nil + } + + specs := make([]packageSpec, 0, len(config.Packages)) + for _, pkg := range config.Packages { + specs = append(specs, parsePackageSpec(pkg)) + } + + indexes := config.Indexes + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + pythonVersion := config.PythonVersion + if pythonVersion == "" { + return nil, fmt.Errorf("python_version is required in ecosystem python config") + } + + return resolvePackages(ctx, specs, indexes, pythonVersion, arch, libc, a) +} + +func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage, config types.EcosystemConfig, a auth.Authenticator) (map[string]string, error) { + log := clog.FromContext(ctx) + + pythonVersion := detectPythonVersion(fsys) + if pythonVersion == "" { + return nil, fmt.Errorf("no Python installation found in filesystem; install python3 via APK first") + } + log.Infof("detected Python %s for python ecosystem install", pythonVersion) + + var sitePackagesPath string + if config.Venv != "" { + venvPath := strings.TrimPrefix(config.Venv, "/") + if err := createVenv(fsys, venvPath, pythonVersion); err != nil { + return nil, fmt.Errorf("creating virtual environment at %s: %w", config.Venv, err) + } + sitePackagesPath = filepath.Join(venvPath, "lib", "python"+pythonVersion, "site-packages") + log.Infof("using virtual environment at %s", config.Venv) + } else { + sitePackagesPath = fmt.Sprintf("usr/lib/python%s/site-packages", pythonVersion) + } + + if err := fsys.MkdirAll(sitePackagesPath, 0755); err != nil { + return nil, fmt.Errorf("creating site-packages directory: %w", err) + } + + tagger, _ := fsys.(ecosystem.OwnerTagger) + + for idx, pkg := range packages { + log.Infof("installing python package %s==%s", pkg.Name, pkg.Version) + + if tagger != nil { + tagger.SetCurrentOwner(pkg.OwnerName()) + } + + data, err := downloadWheel(ctx, pkg.URL, a) + if err != nil { + return nil, fmt.Errorf("downloading %s: %w", pkg.Name, err) + } + + if err := verifyChecksum(data, pkg.Checksum); err != nil { + return nil, fmt.Errorf("verifying %s: %w", pkg.Name, err) + } + + if err := extractWheel(fsys, data, sitePackagesPath); err != nil { + return nil, fmt.Errorf("extracting %s: %w", pkg.Name, err) + } + + if err := writeInstallerFile(fsys, sitePackagesPath, data); err != nil { + log.Debugf("could not write INSTALLER file for %s: %v", pkg.Name, err) + } + + if isChainguardSource(pkg.URL) { + if err := writePackageSBOM(fsys, sitePackagesPath, data, pkg); err != nil { + log.Debugf("could not write SBOM for %s: %v", pkg.Name, err) + } + } + + if tagger != nil { + tagger.SetCurrentOwner("") + packages[idx].InstalledSize = tagger.OwnerSize(pkg.OwnerName()) + } + } + + // When using a venv, set VIRTUAL_ENV and prepend its bin/ to PATH. + if config.Venv != "" { + venvBin := filepath.Join(config.Venv, "bin") + return map[string]string{ + "VIRTUAL_ENV": config.Venv, + "PATH": venvBin + ":/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + }, nil + } + + return nil, nil +} + +// createVenv sets up a virtual environment directory structure. +func createVenv(fsys apkfs.FullFS, venvPath, pythonVersion string) error { + // Create directory structure + dirs := []string{ + filepath.Join(venvPath, "bin"), + filepath.Join(venvPath, "include"), + filepath.Join(venvPath, "lib", "python"+pythonVersion, "site-packages"), + } + for _, dir := range dirs { + if err := fsys.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating %s: %w", dir, err) + } + } + + // Write pyvenv.cfg + cfg := fmt.Sprintf( + "home = /usr/bin\ninclude-system-site-packages = false\nversion = %s\n", + pythonVersion, + ) + cfgPath := filepath.Join(venvPath, "pyvenv.cfg") + if err := fsys.WriteFile(cfgPath, []byte(cfg), 0644); err != nil { + return fmt.Errorf("writing pyvenv.cfg: %w", err) + } + + // Create symlinks in bin/ + pythonBin := "/usr/bin/python" + pythonVersion + binPath := filepath.Join(venvPath, "bin") + symlinks := map[string]string{ + "python": pythonBin, + "python3": pythonBin, + "python" + pythonVersion: pythonBin, + } + for name, target := range symlinks { + linkPath := filepath.Join(binPath, name) + if err := fsys.Symlink(target, linkPath); err != nil { + return fmt.Errorf("creating symlink %s: %w", linkPath, err) + } + } + + return nil +} + +// detectPythonVersion scans the filesystem for a Python installation and +// returns the version string (e.g., "3.12"). +func detectPythonVersion(fsys apkfs.FullFS) string { + entries, err := fsys.ReadDir("usr/lib") + if err != nil { + return "" + } + + for _, entry := range entries { + name := entry.Name() + if strings.HasPrefix(name, "python3.") && entry.IsDir() { + return strings.TrimPrefix(name, "python") + } + } + + return "" +} + +// downloadWheel downloads a wheel file from the given URL. +func downloadWheel(ctx context.Context, url string, a auth.Authenticator) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + if a != nil { + if err := a.AddAuth(ctx, req); err != nil { + return nil, fmt.Errorf("adding auth for %s: %w", url, err) + } + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d downloading %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/ecosystem/python/python_test.go b/pkg/ecosystem/python/python_test.go new file mode 100644 index 000000000..386f0b0ab --- /dev/null +++ b/pkg/ecosystem/python/python_test.go @@ -0,0 +1,117 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "strings" + "testing" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" +) + +func TestCreateVenv(t *testing.T) { + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/bin", 0755); err != nil { + t.Fatal(err) + } + + err := createVenv(fs, "app/venv", "3.12") + if err != nil { + t.Fatalf("createVenv() error: %v", err) + } + + // Check pyvenv.cfg + data, err := fs.ReadFile("app/venv/pyvenv.cfg") + if err != nil { + t.Fatalf("reading pyvenv.cfg: %v", err) + } + cfg := string(data) + if !strings.Contains(cfg, "home = /usr/bin") { + t.Errorf("pyvenv.cfg missing home, got: %q", cfg) + } + if !strings.Contains(cfg, "version = 3.12") { + t.Errorf("pyvenv.cfg missing version, got: %q", cfg) + } + + // Check directories exist + for _, dir := range []string{ + "app/venv/bin", + "app/venv/include", + "app/venv/lib/python3.12/site-packages", + } { + if _, err := fs.Stat(dir); err != nil { + t.Errorf("directory %s should exist: %v", dir, err) + } + } + + // Check symlinks + for _, name := range []string{"python", "python3", "python3.12"} { + target, err := fs.Readlink("app/venv/bin/" + name) + if err != nil { + t.Errorf("symlink %s should exist: %v", name, err) + continue + } + if target != "/usr/bin/python3.12" { + t.Errorf("symlink %s = %q, want %q", name, target, "/usr/bin/python3.12") + } + } +} + +func TestInstallerRegistration(t *testing.T) { + inst, ok := ecosystem.Get("python") + if !ok { + t.Fatal("python installer not registered") + } + if inst.Name() != "python" { + t.Errorf("Name() = %q, want %q", inst.Name(), "python") + } +} + +func TestDetectPythonVersion(t *testing.T) { + fs := apkfs.NewMemFS() + + // No python installed + if v := detectPythonVersion(fs); v != "" { + t.Errorf("detectPythonVersion() = %q on empty fs, want empty", v) + } + + // Create python directory + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatal(err) + } + + v := detectPythonVersion(fs) + if v != "3.12" { + t.Errorf("detectPythonVersion() = %q, want %q", v, "3.12") + } +} + +func TestDetectPythonVersionMultiple(t *testing.T) { + fs := apkfs.NewMemFS() + + // Create multiple python versions - should return whichever is found first + if err := fs.MkdirAll("usr/lib/python3.11/site-packages", 0755); err != nil { + t.Fatal(err) + } + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatal(err) + } + + v := detectPythonVersion(fs) + if v != "3.11" && v != "3.12" { + t.Errorf("detectPythonVersion() = %q, want 3.11 or 3.12", v) + } +} diff --git a/pkg/ecosystem/python/resolve.go b/pkg/ecosystem/python/resolve.go new file mode 100644 index 000000000..d00d9b1e6 --- /dev/null +++ b/pkg/ecosystem/python/resolve.go @@ -0,0 +1,740 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + neturl "net/url" + "regexp" + "strings" + + "chainguard.dev/apko/pkg/apk/auth" + "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" + + "github.com/chainguard-dev/clog" +) + +const defaultIndex = "https://pypi.org/simple/" +const pypiJSONBaseDefault = "https://pypi.org/pypi/" + +// pypiJSONBaseOverride allows tests to redirect the JSON API to a mock server. +var pypiJSONBaseOverride string + +func pypiJSONBase() string { + if pypiJSONBaseOverride != "" { + return pypiJSONBaseOverride + } + return pypiJSONBaseDefault +} + +// packageSpec represents a parsed package requirement (e.g., "flask==3.0.0"). +type packageSpec struct { + Name string + Operator string // "==", ">=", "<=", "!=", "~=", "" + Version string + Extras []string + Markers string +} + +// parsePackageSpec parses a PEP 508-style requirement string. +func parsePackageSpec(spec string) packageSpec { + ps := packageSpec{} + + // Strip environment markers + if idx := strings.Index(spec, ";"); idx != -1 { + ps.Markers = strings.TrimSpace(spec[idx+1:]) + spec = strings.TrimSpace(spec[:idx]) + } + + // Strip extras + if lbIdx := strings.Index(spec, "["); lbIdx != -1 { + if rbIdx := strings.Index(spec, "]"); rbIdx != -1 { + extras := spec[lbIdx+1 : rbIdx] + ps.Extras = strings.Split(extras, ",") + for i := range ps.Extras { + ps.Extras[i] = strings.TrimSpace(ps.Extras[i]) + } + spec = spec[:lbIdx] + spec[rbIdx+1:] + } + } + + spec = strings.TrimSpace(spec) + + // Handle parenthesized version constraints: "package (>=1.0)" + if lpIdx := strings.Index(spec, "("); lpIdx != -1 { + if rpIdx := strings.LastIndex(spec, ")"); rpIdx > lpIdx { + ps.Name = strings.TrimSpace(spec[:lpIdx]) + inner := strings.TrimSpace(spec[lpIdx+1 : rpIdx]) + parts := strings.SplitN(inner, ",", 2) + constraint := strings.TrimSpace(parts[0]) + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + if strings.HasPrefix(constraint, op) { + ps.Operator = op + ps.Version = strings.TrimSpace(constraint[len(op):]) + return ps + } + } + return ps + } + } + + // Find the first operator by position in the string + bestIdx := -1 + bestOp := "" + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + idx := strings.Index(spec, op) + if idx != -1 && (bestIdx == -1 || idx < bestIdx) { + bestIdx = idx + bestOp = op + } + } + if bestIdx != -1 { + ps.Name = strings.TrimSpace(spec[:bestIdx]) + ps.Operator = bestOp + version := strings.TrimSpace(spec[bestIdx+len(bestOp):]) + if commaIdx := strings.Index(version, ","); commaIdx != -1 { + version = version[:commaIdx] + } + ps.Version = version + return ps + } + + ps.Name = spec + return ps +} + +// normalizeName normalizes a Python package name per PEP 503. +func normalizeName(name string) string { + return strings.ToLower(regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")) +} + +// --- PyPI JSON API types --- + +// pypiPackageJSON is the response from https://pypi.org/pypi/{name}/{version}/json +type pypiPackageJSON struct { + Info pypiInfo `json:"info"` + URLs []pypiURL `json:"urls"` +} + +type pypiInfo struct { + Name string `json:"name"` + Version string `json:"version"` + RequiresDist []string `json:"requires_dist"` +} + +type pypiURL struct { + Filename string `json:"filename"` + URL string `json:"url"` + PackageType string `json:"packagetype"` + Digests pypiDigests `json:"digests"` +} + +type pypiDigests struct { + SHA256 string `json:"sha256"` +} + +// pypiVersionsJSON is a minimal parse of https://pypi.org/pypi/{name}/json +// to list available versions. +type pypiVersionsJSON struct { + Releases map[string][]pypiURL `json:"releases"` +} + +// --- Resolution --- + +// resolvePackages resolves package specs to specific wheel URLs, +// including transitive dependencies discovered via the PyPI JSON API. +func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { + log := clog.FromContext(ctx) + + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + var resolved []ecosystem.ResolvedPackage + seen := map[string]bool{} + + // BFS queue + queue := make([]packageSpec, len(specs)) + copy(queue, specs) + + for len(queue) > 0 { + spec := queue[0] + queue = queue[1:] + + name := normalizeName(spec.Name) + if seen[name] { + continue + } + + pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch, libc, a) + if err != nil { + return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) + } + seen[name] = true + resolved = append(resolved, pkg) + log.Debugf("resolved %s==%s from %s", pkg.Name, pkg.Version, pkg.URL) + + for _, dep := range deps { + if !seen[normalizeName(dep.Name)] { + log.Debugf("discovered transitive dependency: %s (from %s)", dep.Name, pkg.Name) + queue = append(queue, dep) + } + } + } + + return resolved, nil +} + +// resolveOneWithDeps resolves a package and returns both the resolved package +// and its transitive dependencies. It tries the PyPI JSON API first (which +// gives us clean metadata), falling back to the Simple API for non-PyPI indexes. +func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { + // Try PyPI JSON API first — it gives us metadata + wheel URLs in one call + if usesDefaultPyPI(indexes) { + pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch, libc, a) + if err == nil { + return pkg, deps, nil + } + clog.FromContext(ctx).Debugf("JSON API failed for %s, falling back to Simple API: %v", spec.Name, err) + } + + // Fall back to Simple API (downloads wheel to extract Requires-Dist for deps) + pkg, deps, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch, libc, a) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + return pkg, deps, nil +} + +func usesDefaultPyPI(indexes []string) bool { + if pypiJSONBaseOverride != "" { + return true + } + for _, idx := range indexes { + if strings.Contains(idx, "pypi.org") { + return true + } + } + return false +} + +// resolveViaJSON resolves a package using the PyPI JSON API. +// Returns the resolved package and its parsed Requires-Dist as deps. +func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { + name := normalizeName(spec.Name) + + // If we have an exact version, fetch that directly + if spec.Operator == "==" { + return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch, libc, a) + } + + // Otherwise, list all versions and pick the best + versionsURL := pypiJSONBase() + name + "/json" + data, err := httpGet(ctx, versionsURL, a) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + var versionsResp pypiVersionsJSON + if err := json.Unmarshal(data, &versionsResp); err != nil { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("parsing PyPI versions JSON: %w", err) + } + + // Find the best matching version + bestVersion := "" + for version := range versionsResp.Releases { + if !matchesVersionSpec(version, spec) { + continue + } + // Skip pre-releases unless explicitly requested + if isPreRelease(version) && spec.Operator != "==" { + continue + } + if bestVersion == "" || compareVersions(version, bestVersion) > 0 { + bestVersion = version + } + } + if bestVersion == "" { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("no matching version for %s%s%s", spec.Name, spec.Operator, spec.Version) + } + + return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch, libc, a) +} + +// resolveJSONVersion fetches a specific version from the PyPI JSON API. +func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { + versionURL := pypiJSONBase() + normalizedName + "/" + version + "/json" + data, err := httpGet(ctx, versionURL, a) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + var pkgResp pypiPackageJSON + if err := json.Unmarshal(data, &pkgResp); err != nil { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("parsing PyPI JSON: %w", err) + } + + // Find the best wheel from the URLs + wheelURL, checksum, err := selectBestWheelFromJSON(pkgResp.URLs, pythonVersion, arch, libc) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + // Parse dependencies from requires_dist + deps := make([]packageSpec, 0, len(pkgResp.Info.RequiresDist)) + for _, req := range pkgResp.Info.RequiresDist { + dep := parsePackageSpec(req) + if dep.Markers != "" && !evaluateMarkers(dep.Markers, nil) { + continue + } + deps = append(deps, dep) + } + + return ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: originalName, + Version: pkgResp.Info.Version, + URL: wheelURL, + Checksum: checksum, + }, deps, nil +} + +// selectBestWheelFromJSON picks the best compatible wheel from PyPI JSON API URLs. +func selectBestWheelFromJSON(urls []pypiURL, pythonVersion string, arch types.Architecture, libc string) (string, string, error) { + var bestURL *pypiURL + var bestParts wheelFileParts + + for i, u := range urls { + if u.PackageType != "bdist_wheel" { + continue + } + parts, err := parseWheelFilename(u.Filename) + if err != nil { + continue + } + if !isCompatibleWheel(parts, pythonVersion, arch, libc) { + continue + } + + if bestURL == nil || isBetterWheel(bestParts, parts) { + bestURL = &urls[i] + bestParts = parts + } + } + + if bestURL == nil { + return "", "", fmt.Errorf("no compatible wheel found") + } + + checksum := "" + if bestURL.Digests.SHA256 != "" { + checksum = "sha256:" + bestURL.Digests.SHA256 + } + return bestURL.URL, checksum, nil +} + +// isPreRelease returns true if a version string looks like a pre-release. +func isPreRelease(version string) bool { + v := strings.ToLower(version) + for _, tag := range []string{"a", "b", "rc", "alpha", "beta", "dev", "pre"} { + if strings.Contains(v, tag) { + return true + } + } + return false +} + +// --- Simple API fallback (for non-PyPI indexes) --- + +// wheelLink represents a parsed link from a PEP 503 Simple API response. +type wheelLink struct { + Filename string + URL string + Checksum string // "sha256:" + RequiresPython string + SignatureURL string // optional: from data-signature attribute + ProvenanceURL string // optional: from data-provenance attribute +} + +// parseSimpleIndex parses the HTML from a PEP 503 Simple Repository API response. +func parseSimpleIndex(body string, baseURL string) []wheelLink { + // Use a regex that handles '>' inside quoted attribute values (e.g., data-requires-python=">=3.0"). + // The [^>]* approach breaks when attributes contain '>' characters. + linkRe := regexp.MustCompile(`"]*(?:"[^"]*")?)*href="([^"]*)"(?:[^>"]*(?:"[^"]*")?)*>([^<]*)`) + requiresPythonRe := regexp.MustCompile(`data-requires-python="([^"]*)"`) + provenanceRe := regexp.MustCompile(`data-provenance="([^"]*)"`) + signatureRe := regexp.MustCompile(`data-signature="([^"]*)"`) + + matches := linkRe.FindAllStringSubmatch(body, -1) + links := make([]wheelLink, 0, len(matches)) + for _, match := range matches { + href := match[1] + filename := strings.TrimSpace(match[2]) + + if !strings.HasSuffix(filename, ".whl") { + continue + } + + var checksum string + if hashIdx := strings.Index(href, "#sha256="); hashIdx != -1 { + checksum = "sha256:" + href[hashIdx+8:] + href = href[:hashIdx] + } + + linkURL := href + if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { + if base, err := neturl.Parse(baseURL); err == nil { + if ref, err := neturl.Parse(href); err == nil { + linkURL = base.ResolveReference(ref).String() + } + } + } + + var requiresPython, provenanceURL, signatureURL string + matchIdx := strings.Index(body, match[0]) + if matchIdx >= 0 { + // match[0] starts with "= 0 { + // Find the closing '>' of the tag, skipping '>' inside quoted attributes. + tag := "" + rest := body[tagStart:] + inQuote := false + for j, c := range rest { + if c == '"' { + inQuote = !inQuote + } else if c == '>' && !inQuote { + tag = rest[:j+1] + break + } + } + if rpMatch := requiresPythonRe.FindStringSubmatch(tag); rpMatch != nil { + requiresPython = strings.ReplaceAll(rpMatch[1], ">", ">") + requiresPython = strings.ReplaceAll(requiresPython, "<", "<") + requiresPython = strings.ReplaceAll(requiresPython, "&", "&") + } + if pvMatch := provenanceRe.FindStringSubmatch(tag); pvMatch != nil { + provenanceURL = pvMatch[1] + } + if sigMatch := signatureRe.FindStringSubmatch(tag); sigMatch != nil { + signatureURL = sigMatch[1] + } + } + } + + links = append(links, wheelLink{ + Filename: filename, + URL: linkURL, + Checksum: checksum, + RequiresPython: requiresPython, + SignatureURL: signatureURL, + ProvenanceURL: provenanceURL, + }) + } + + return links +} + +// resolveViaSimple resolves a package using the PEP 503 Simple API. +// After finding the best wheel, it downloads it to extract Requires-Dist +// metadata for transitive dependency resolution. +func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { + name := normalizeName(spec.Name) + + for _, index := range indexes { + indexURL := strings.TrimSuffix(index, "/") + "/" + name + "/" + + body, err := fetchSimpleIndex(ctx, indexURL, a) + if err != nil { + clog.FromContext(ctx).Debugf("index %s: %v", indexURL, err) + continue + } + + links := parseSimpleIndex(body, indexURL) + if len(links) == 0 { + continue + } + + best, err := selectBestWheel(links, spec, pythonVersion, arch, libc) + if err != nil { + continue + } + + pkg := ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: spec.Name, + Version: best.version, + URL: best.url, + Checksum: best.checksum, + SignatureURL: best.signatureURL, + ProvenanceURL: best.provenanceURL, + } + + // Download wheel to extract Requires-Dist for transitive deps. + deps, err := extractDepsFromWheel(ctx, best.url, a) + if err != nil { + clog.FromContext(ctx).Debugf("could not extract deps from wheel for %s: %v", spec.Name, err) + } + + return pkg, deps, nil + } + + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("package %s not found in any index", spec.Name) +} + +// extractDepsFromWheel downloads a wheel and parses its METADATA for Requires-Dist. +func extractDepsFromWheel(ctx context.Context, url string, a auth.Authenticator) ([]packageSpec, error) { + data, err := httpGet(ctx, url, a) + if err != nil { + return nil, fmt.Errorf("downloading wheel: %w", err) + } + + reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, fmt.Errorf("opening wheel as zip: %w", err) + } + + for _, f := range reader.File { + if !strings.HasSuffix(f.Name, ".dist-info/METADATA") { + continue + } + rc, err := f.Open() + if err != nil { + return nil, fmt.Errorf("opening METADATA: %w", err) + } + metadataBytes, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return nil, fmt.Errorf("reading METADATA: %w", err) + } + return parseRequiresDist(string(metadataBytes)), nil + } + + return nil, nil +} + +// parseRequiresDist extracts Requires-Dist entries from wheel METADATA content. +func parseRequiresDist(metadata string) []packageSpec { + deps := make([]packageSpec, 0, strings.Count(metadata, "Requires-Dist: ")) + for line := range strings.SplitSeq(metadata, "\n") { + line = strings.TrimRight(line, "\r") + if !strings.HasPrefix(line, "Requires-Dist: ") { + continue + } + req := strings.TrimPrefix(line, "Requires-Dist: ") + dep := parsePackageSpec(req) + if dep.Markers != "" && !evaluateMarkers(dep.Markers, nil) { + continue + } + deps = append(deps, dep) + } + return deps +} + +type selectedWheel struct { + version string + url string + checksum string + signatureURL string + provenanceURL string +} + +// selectBestWheel selects the best compatible wheel from Simple API links. +func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, arch types.Architecture, libc string) (selectedWheel, error) { + var bestLink *wheelLink + var bestParts wheelFileParts + + for i, link := range links { + parts, err := parseWheelFilename(link.Filename) + if err != nil { + continue + } + if !isCompatibleWheel(parts, pythonVersion, arch, libc) { + continue + } + if !matchesVersionSpec(parts.Version, spec) { + continue + } + + if bestLink == nil || compareVersions(parts.Version, bestParts.Version) > 0 || (compareVersions(parts.Version, bestParts.Version) == 0 && isBetterWheel(bestParts, parts)) { + bestLink = &links[i] + bestParts = parts + } + } + + if bestLink == nil { + return selectedWheel{}, fmt.Errorf("no compatible wheel found") + } + + return selectedWheel{ + version: bestParts.Version, + url: bestLink.URL, + checksum: bestLink.Checksum, + signatureURL: bestLink.SignatureURL, + provenanceURL: bestLink.ProvenanceURL, + }, nil +} + +// --- Version comparison --- + +func matchesVersionSpec(version string, spec packageSpec) bool { + if spec.Operator == "" { + return true + } + switch spec.Operator { + case "==": + return version == spec.Version + case "!=": + return version != spec.Version + case ">=": + return compareVersions(version, spec.Version) >= 0 + case "<=": + return compareVersions(version, spec.Version) <= 0 + case ">": + return compareVersions(version, spec.Version) > 0 + case "<": + return compareVersions(version, spec.Version) < 0 + case "~=": + if compareVersions(version, spec.Version) < 0 { + return false + } + specParts := strings.Split(spec.Version, ".") + verParts := strings.Split(version, ".") + if len(specParts) < 2 || len(verParts) < 2 { + return false + } + for i := 0; i < len(specParts)-1 && i < len(verParts); i++ { + if verParts[i] != specParts[i] { + return false + } + } + return true + } + return false +} + +func compareVersions(a, b string) int { + aParts := strings.Split(a, ".") + bParts := strings.Split(b, ".") + + maxLen := len(aParts) + maxLen = max(maxLen, len(bParts)) + + for i := 0; i < maxLen; i++ { + var aVal, bVal string + if i < len(aParts) { + aVal = aParts[i] + } else { + aVal = "0" + } + if i < len(bParts) { + bVal = bParts[i] + } else { + bVal = "0" + } + if aVal == bVal { + continue + } + aNum := parseVersionPart(aVal) + bNum := parseVersionPart(bVal) + if aNum != bNum { + if aNum < bNum { + return -1 + } + return 1 + } + if aVal < bVal { + return -1 + } + return 1 + } + return 0 +} + +func parseVersionPart(s string) int { + n := 0 + for _, c := range s { + if c >= '0' && c <= '9' { + n = n*10 + int(c-'0') + } else { + break + } + } + return n +} + +// --- HTTP helpers --- + +func fetchSimpleIndex(ctx context.Context, url string, a auth.Authenticator) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "text/html") + + if a != nil { + if err := a.AddAuth(ctx, req); err != nil { + return "", fmt.Errorf("adding auth for %s: %w", url, err) + } + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + return string(body), nil +} + +func httpGet(ctx context.Context, url string, a auth.Authenticator) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + if a != nil { + if err := a.AddAuth(ctx, req); err != nil { + return nil, fmt.Errorf("adding auth for %s: %w", url, err) + } + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/ecosystem/python/resolve_test.go b/pkg/ecosystem/python/resolve_test.go new file mode 100644 index 000000000..3aa13cec8 --- /dev/null +++ b/pkg/ecosystem/python/resolve_test.go @@ -0,0 +1,471 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + + "chainguard.dev/apko/pkg/build/types" +) + +func TestParsePackageSpec(t *testing.T) { + tests := []struct { + input string + name string + op string + version string + markers string + }{ + {"flask==3.0.0", "flask", "==", "3.0.0", ""}, + {"requests>=2.31.0", "requests", ">=", "2.31.0", ""}, + {"numpy", "numpy", "", "", ""}, + {"foo~=1.4.2", "foo", "~=", "1.4.2", ""}, + {"bar!=2.0", "bar", "!=", "2.0", ""}, + {`baz>=1.0; python_version>="3.8"`, "baz", ">=", "1.0", `python_version>="3.8"`}, + {"typing-extensions (>=4.10.0)", "typing-extensions", ">=", "4.10.0", ""}, + {"packaging (>=22.0,<25.0)", "packaging", ">=", "22.0", ""}, + {"mpmath<1.4,>=1.1.0", "mpmath", "<", "1.4", ""}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + spec := parsePackageSpec(tt.input) + if spec.Name != tt.name { + t.Errorf("Name = %q, want %q", spec.Name, tt.name) + } + if spec.Operator != tt.op { + t.Errorf("Operator = %q, want %q", spec.Operator, tt.op) + } + if spec.Version != tt.version { + t.Errorf("Version = %q, want %q", spec.Version, tt.version) + } + if spec.Markers != tt.markers { + t.Errorf("Markers = %q, want %q", spec.Markers, tt.markers) + } + }) + } +} + +func TestNormalizeName(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"Flask", "flask"}, + {"my-package", "my-package"}, + {"my_package", "my-package"}, + {"My.Package", "my-package"}, + {"My---Package", "my-package"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := normalizeName(tt.input) + if got != tt.want { + t.Errorf("normalizeName(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestParseSimpleIndex(t *testing.T) { + body := ` + +Flask-3.0.0-py3-none-any.whl +Flask-2.3.0-py3-none-any.whl +Flask-3.0.0.tar.gz + +` + links := parseSimpleIndex(body, "https://pypi.org/simple/flask/") + if len(links) != 2 { + t.Fatalf("expected 2 wheel links, got %d", len(links)) + } + + if links[0].Filename != "Flask-3.0.0-py3-none-any.whl" { + t.Errorf("links[0].Filename = %q", links[0].Filename) + } + if links[0].Checksum != "sha256:abc123" { + t.Errorf("links[0].Checksum = %q", links[0].Checksum) + } +} + +func TestParseSimpleIndexProvenance(t *testing.T) { + body := ` + +foo-1.0.0-py3-none-any.whl +bar-2.0.0-py3-none-any.whl + +` + links := parseSimpleIndex(body, "https://cgr.dev/simple/") + if len(links) != 2 { + t.Fatalf("expected 2 wheel links, got %d", len(links)) + } + + // First link should have provenance and signature + if links[0].ProvenanceURL != "https://cgr.dev/prov/foo" { + t.Errorf("links[0].ProvenanceURL = %q, want %q", links[0].ProvenanceURL, "https://cgr.dev/prov/foo") + } + if links[0].SignatureURL != "https://cgr.dev/sig/foo" { + t.Errorf("links[0].SignatureURL = %q, want %q", links[0].SignatureURL, "https://cgr.dev/sig/foo") + } + if links[0].RequiresPython != ">=3.8" { + t.Errorf("links[0].RequiresPython = %q, want %q", links[0].RequiresPython, ">=3.8") + } + + // Second link should have empty provenance/signature + if links[1].ProvenanceURL != "" { + t.Errorf("links[1].ProvenanceURL = %q, want empty", links[1].ProvenanceURL) + } + if links[1].SignatureURL != "" { + t.Errorf("links[1].SignatureURL = %q, want empty", links[1].SignatureURL) + } +} + +func TestParseRequiresDist(t *testing.T) { + metadata := `Metadata-Version: 2.1 +Name: vunnel +Version: 0.55.3 +Requires-Dist: click>=8.0 +Requires-Dist: PyYAML>=6.0 +Requires-Dist: colorlog>=6.0 +Requires-Dist: pytest; extra == "dev" +Requires-Dist: importlib-metadata>=4.0; python_version < "3.8" +` + deps := parseRequiresDist(metadata) + + // Should get click, PyYAML, colorlog (not pytest which needs extra, not importlib-metadata gated on old python) + names := map[string]bool{} + for _, d := range deps { + names[normalizeName(d.Name)] = true + } + if !names["click"] { + t.Error("missing click") + } + if !names["pyyaml"] { + t.Error("missing pyyaml") + } + if !names["colorlog"] { + t.Error("missing colorlog") + } + if names["pytest"] { + t.Error("should not include pytest (extra-gated)") + } + // importlib-metadata is python_version gated — evaluateMarkers is permissive for python_version + // so it WILL be included (which is correct — we filter by wheel compatibility later) +} + +func TestCompareVersions(t *testing.T) { + tests := []struct { + a, b string + want int + }{ + {"1.0.0", "1.0.0", 0}, + {"2.0.0", "1.0.0", 1}, + {"1.0.0", "2.0.0", -1}, + {"1.10.0", "1.9.0", 1}, + {"1.0", "1.0.0", 0}, + } + + for _, tt := range tests { + t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) { + got := compareVersions(tt.a, tt.b) + if got != tt.want { + t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want) + } + }) + } +} + +func TestMatchesVersionSpec(t *testing.T) { + tests := []struct { + version string + spec packageSpec + want bool + }{ + {"3.0.0", packageSpec{Operator: "==", Version: "3.0.0"}, true}, + {"3.0.1", packageSpec{Operator: "==", Version: "3.0.0"}, false}, + {"3.0.0", packageSpec{Operator: ">=", Version: "2.0.0"}, true}, + {"1.0.0", packageSpec{Operator: ">=", Version: "2.0.0"}, false}, + {"3.0.0", packageSpec{Operator: "", Version: ""}, true}, + {"1.4.3", packageSpec{Operator: "~=", Version: "1.4.2"}, true}, + {"2.0.0", packageSpec{Operator: "~=", Version: "1.4.2"}, false}, + } + + for _, tt := range tests { + t.Run(tt.version+"_"+tt.spec.Operator+tt.spec.Version, func(t *testing.T) { + got := matchesVersionSpec(tt.version, tt.spec) + if got != tt.want { + t.Errorf("matchesVersionSpec(%q, %v) = %v, want %v", tt.version, tt.spec, got, tt.want) + } + }) + } +} + +func TestIsPreRelease(t *testing.T) { + tests := []struct { + version string + want bool + }{ + {"3.0.0", false}, + {"3.0.0rc1", true}, + {"3.0.0a1", true}, + {"3.0.0b2", true}, + {"3.0.0.dev1", true}, + {"1.14.0rc2", true}, + } + for _, tt := range tests { + t.Run(tt.version, func(t *testing.T) { + got := isPreRelease(tt.version) + if got != tt.want { + t.Errorf("isPreRelease(%q) = %v, want %v", tt.version, got, tt.want) + } + }) + } +} + +// servePyPIJSON creates a mock server that serves PyPI JSON API responses. +// Wheel URLs in test data use a placeholder that gets replaced with the +// actual test server URL to avoid outbound network calls. +func servePyPIJSON(t *testing.T, packages map[string]pypiPackageJSON) *httptest.Server { + t.Helper() + mux := http.NewServeMux() + + // Placeholder replaced with actual server URL after startup. + var serverURL string + + for name, pkg := range packages { + name := normalizeName(name) + pkg := pkg + + // Serve /pypi/{name}/{version}/json + mux.HandleFunc("/pypi/"+name+"/"+pkg.Info.Version+"/json", func(w http.ResponseWriter, r *http.Request) { + // Rewrite placeholder URLs to point to this test server. + resp := pkg + for i := range resp.URLs { + resp.URLs[i].URL = serverURL + "/wheels/" + resp.URLs[i].Filename + } + json.NewEncoder(w).Encode(resp) + }) + + // Serve /pypi/{name}/json (versions listing) + mux.HandleFunc("/pypi/"+name+"/json", func(w http.ResponseWriter, r *http.Request) { + urls := make([]pypiURL, len(pkg.URLs)) + copy(urls, pkg.URLs) + for i := range urls { + urls[i].URL = serverURL + "/wheels/" + urls[i].Filename + } + resp := pypiVersionsJSON{ + Releases: map[string][]pypiURL{ + pkg.Info.Version: urls, + }, + } + json.NewEncoder(w).Encode(resp) + }) + + // Serve Simple API as fallback + mux.HandleFunc("/simple/"+name+"/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + var b strings.Builder + b.WriteString("\n") + for _, u := range pkg.URLs { + b.WriteString(`` + u.Filename + "\n") + } + b.WriteString("") + w.Write([]byte(b.String())) + }) + } + + // Serve dummy wheel downloads (resolver fetches these to extract deps). + mux.HandleFunc("/wheels/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + server := httptest.NewServer(mux) + serverURL = server.URL + return server +} + +func TestResolveWithMockJSON(t *testing.T) { + server := servePyPIJSON(t, map[string]pypiPackageJSON{ + "flask": { + Info: pypiInfo{ + Name: "Flask", + Version: "3.0.0", + }, + URLs: []pypiURL{{ + Filename: "Flask-3.0.0-py3-none-any.whl", + URL: "https://placeholder/Flask-3.0.0-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "abc123"}, + }}, + }, + }) + defer server.Close() + + // Override the JSON API base for the test + origBase := pypiJSONBase + defer func() { pypiJSONBaseOverride = ""; _ = origBase }() + pypiJSONBaseOverride = server.URL + "/pypi/" + + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), "glibc", nil) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + if len(resolved) != 1 { + t.Fatalf("expected 1 resolved package, got %d", len(resolved)) + } + if resolved[0].Name != "flask" { + t.Errorf("Name = %q, want %q", resolved[0].Name, "flask") + } + if resolved[0].Version != "3.0.0" { + t.Errorf("Version = %q, want %q", resolved[0].Version, "3.0.0") + } + if resolved[0].Checksum != "sha256:abc123" { + t.Errorf("Checksum = %q, want %q", resolved[0].Checksum, "sha256:abc123") + } +} + +func TestResolveTransitiveDeps(t *testing.T) { + server := servePyPIJSON(t, map[string]pypiPackageJSON{ + "flask": { + Info: pypiInfo{ + Name: "Flask", + Version: "3.0.0", + RequiresDist: []string{ + "Werkzeug>=3.0.0", + "click>=8.0", + "devtools; extra == \"dev\"", + }, + }, + URLs: []pypiURL{{ + Filename: "Flask-3.0.0-py3-none-any.whl", + URL: "https://placeholder/Flask-3.0.0-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "aaa"}, + }}, + }, + "werkzeug": { + Info: pypiInfo{ + Name: "Werkzeug", + Version: "3.0.1", + RequiresDist: []string{ + "MarkupSafe>=2.1.1", + }, + }, + URLs: []pypiURL{{ + Filename: "Werkzeug-3.0.1-py3-none-any.whl", + URL: "https://placeholder/Werkzeug-3.0.1-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "bbb"}, + }}, + }, + "click": { + Info: pypiInfo{ + Name: "click", + Version: "8.1.7", + }, + URLs: []pypiURL{{ + Filename: "click-8.1.7-py3-none-any.whl", + URL: "https://placeholder/click-8.1.7-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "ccc"}, + }}, + }, + "markupsafe": { + Info: pypiInfo{ + Name: "MarkupSafe", + Version: "2.1.5", + }, + URLs: []pypiURL{{ + Filename: "MarkupSafe-2.1.5-py3-none-any.whl", + URL: "https://placeholder/MarkupSafe-2.1.5-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "ddd"}, + }}, + }, + }) + defer server.Close() + + pypiJSONBaseOverride = server.URL + "/pypi/" + defer func() { pypiJSONBaseOverride = "" }() + + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), "glibc", nil) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + names := map[string]bool{} + for _, pkg := range resolved { + names[normalizeName(pkg.Name)] = true + } + + for _, want := range []string{"flask", "werkzeug", "click", "markupsafe"} { + if !names[want] { + t.Errorf("missing transitive dependency: %s (resolved: %v)", want, names) + } + } + if names["devtools"] { + t.Error("should NOT include devtools (gated on extra)") + } + if len(resolved) != 4 { + t.Errorf("expected 4 resolved packages, got %d: %v", len(resolved), names) + } +} + +func TestResolveSimpleApiFallback(t *testing.T) { + // Test that non-PyPI indexes use the Simple API. + // All URLs must point to the test server to avoid outbound network calls. + mux := http.NewServeMux() + + // Serve the simple index page; the wheel URL is set dynamically after the server starts. + var serverURL string + mux.HandleFunc("/simple/mypackage/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` +mypackage-1.0.0-py3-none-any.whl +`)) + }) + + // Serve a dummy wheel (the resolver downloads it to extract deps). + mux.HandleFunc("/wheels/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + server := httptest.NewServer(mux) + defer server.Close() + serverURL = server.URL + + specs := []packageSpec{{Name: "mypackage", Operator: "==", Version: "1.0.0"}} + // Use a non-pypi index so it doesn't try the JSON API + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), "glibc", nil) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + if len(resolved) != 1 { + t.Fatalf("expected 1 resolved package, got %d", len(resolved)) + } + if resolved[0].Version != "1.0.0" { + t.Errorf("Version = %q, want %q", resolved[0].Version, "1.0.0") + } +} diff --git a/pkg/ecosystem/python/sbom.go b/pkg/ecosystem/python/sbom.go new file mode 100644 index 000000000..d00ec1c0b --- /dev/null +++ b/pkg/ecosystem/python/sbom.go @@ -0,0 +1,119 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "encoding/json" + "fmt" + "path/filepath" + "strings" + "time" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" +) + +// isChainguardSource returns true if the URL points to a Chainguard Libraries index. +func isChainguardSource(url string) bool { + return strings.Contains(url, "cgr.dev") +} + +// writePackageSBOM writes a minimal SPDX 2.3 SBOM into the dist-info/sboms/ directory. +// This enables `chainctl libraries verify` to confirm Chainguard provenance. +func writePackageSBOM(fsys apkfs.FullFS, sitePackagesPath string, wheelData []byte, pkg ecosystem.ResolvedPackage) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return err + } + + // Find the .dist-info directory name from the wheel contents. + var distInfoDir string + for _, f := range reader.File { + if strings.HasSuffix(f.Name, ".dist-info/METADATA") { + distInfoDir = filepath.Dir(f.Name) + break + } + } + if distInfoDir == "" { + return fmt.Errorf("no .dist-info/METADATA found in wheel") + } + + sbomData, err := generatePackageSBOM(pkg) + if err != nil { + return fmt.Errorf("generating SBOM: %w", err) + } + + sbomDir := filepath.Join(sitePackagesPath, distInfoDir, "sboms") + if err := fsys.MkdirAll(sbomDir, 0755); err != nil { + return fmt.Errorf("creating sboms directory: %w", err) + } + + sbomPath := filepath.Join(sbomDir, "sbom.spdx.json") + return fsys.WriteFile(sbomPath, sbomData, 0644) +} + +// spdxDocument is a minimal SPDX 2.3 JSON document structure. +type spdxDocument struct { + SPDXVersion string `json:"spdxVersion"` + DataLicense string `json:"dataLicense"` + SPDXID string `json:"SPDXID"` + Name string `json:"name"` + Namespace string `json:"documentNamespace"` + CreationInfo spdxCreationInfo `json:"creationInfo"` + Packages []spdxPackage `json:"packages"` +} + +type spdxCreationInfo struct { + Created string `json:"created"` + Creators []string `json:"creators"` +} + +type spdxPackage struct { + SPDXID string `json:"SPDXID"` + Name string `json:"name"` + Version string `json:"versionInfo"` + Supplier string `json:"supplier"` + Originator string `json:"originator"` + DownloadLocation string `json:"downloadLocation"` + FilesAnalyzed bool `json:"filesAnalyzed"` +} + +// generatePackageSBOM generates a minimal SPDX 2.3 JSON SBOM for a Chainguard-sourced package. +func generatePackageSBOM(pkg ecosystem.ResolvedPackage) ([]byte, error) { + doc := spdxDocument{ + SPDXVersion: "SPDX-2.3", + DataLicense: "CC0-1.0", + SPDXID: "SPDXRef-DOCUMENT", + Name: pkg.Name + "-" + pkg.Version, + Namespace: "https://chainguard.dev/spdx/" + pkg.Name + "-" + pkg.Version, + CreationInfo: spdxCreationInfo{ + Created: time.Now().UTC().Format(time.RFC3339), + Creators: []string{"Tool: apko", "Organization: Chainguard, Inc."}, + }, + Packages: []spdxPackage{{ + SPDXID: "SPDXRef-Package", + Name: pkg.Name, + Version: pkg.Version, + Supplier: "Organization: Chainguard, Inc.", + Originator: "Organization: Chainguard, Inc.", + DownloadLocation: pkg.URL, + FilesAnalyzed: false, + }}, + } + + return json.MarshalIndent(doc, "", " ") +} diff --git a/pkg/ecosystem/python/sbom_test.go b/pkg/ecosystem/python/sbom_test.go new file mode 100644 index 000000000..18096acd9 --- /dev/null +++ b/pkg/ecosystem/python/sbom_test.go @@ -0,0 +1,100 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "encoding/json" + "strings" + "testing" + + "chainguard.dev/apko/pkg/ecosystem" +) + +func TestIsChainguardSource(t *testing.T) { + tests := []struct { + url string + want bool + }{ + {"https://cgr.dev/chainguard-dev/libraries/python/simple/flask/Flask-3.0.0-py3-none-any.whl", true}, + {"https://packages.cgr.dev/os/x86_64/some-package.whl", true}, + {"https://pypi.org/simple/flask/Flask-3.0.0-py3-none-any.whl", false}, + {"https://files.pythonhosted.org/packages/Flask-3.0.0-py3-none-any.whl", false}, + } + + for _, tt := range tests { + t.Run(tt.url, func(t *testing.T) { + got := isChainguardSource(tt.url) + if got != tt.want { + t.Errorf("isChainguardSource(%q) = %v, want %v", tt.url, got, tt.want) + } + }) + } +} + +func TestGeneratePackageSBOM(t *testing.T) { + pkg := ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: "flask", + Version: "3.0.0", + URL: "https://cgr.dev/chainguard-dev/libraries/python/simple/flask/Flask-3.0.0-py3-none-any.whl", + Checksum: "sha256:abc123", + } + + data, err := generatePackageSBOM(pkg) + if err != nil { + t.Fatalf("generatePackageSBOM() error: %v", err) + } + + var doc spdxDocument + if err := json.Unmarshal(data, &doc); err != nil { + t.Fatalf("unmarshaling SBOM: %v", err) + } + + if doc.SPDXVersion != "SPDX-2.3" { + t.Errorf("SPDXVersion = %q, want %q", doc.SPDXVersion, "SPDX-2.3") + } + + // Verify creators include Chainguard — this is what chainctl libraries verify checks. + foundChainguard := false + for _, c := range doc.CreationInfo.Creators { + if strings.Contains(strings.ToLower(c), "chainguard") { + foundChainguard = true + } + } + if !foundChainguard { + t.Errorf("creationInfo.creators %v does not contain Chainguard", doc.CreationInfo.Creators) + } + + if len(doc.Packages) != 1 { + t.Fatalf("expected 1 package, got %d", len(doc.Packages)) + } + + p := doc.Packages[0] + if p.Name != "flask" { + t.Errorf("package name = %q, want %q", p.Name, "flask") + } + if p.Version != "3.0.0" { + t.Errorf("package version = %q, want %q", p.Version, "3.0.0") + } + if !strings.Contains(strings.ToLower(p.Supplier), "chainguard") { + t.Errorf("supplier = %q, does not contain chainguard", p.Supplier) + } + if !strings.Contains(strings.ToLower(p.Originator), "chainguard") { + t.Errorf("originator = %q, does not contain chainguard", p.Originator) + } + if p.DownloadLocation != pkg.URL { + t.Errorf("downloadLocation = %q, want %q", p.DownloadLocation, pkg.URL) + } +} diff --git a/pkg/ecosystem/python/wheel.go b/pkg/ecosystem/python/wheel.go new file mode 100644 index 000000000..50c6ff2a0 --- /dev/null +++ b/pkg/ecosystem/python/wheel.go @@ -0,0 +1,309 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "path/filepath" + "strings" + + apkfs "chainguard.dev/apko/pkg/apk/fs" +) + +// extractWheel extracts a wheel (.whl) file into the filesystem at the given +// site-packages path. A .whl file is a ZIP archive. +func extractWheel(fsys apkfs.FullFS, wheelData []byte, sitePackagesPath string) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return fmt.Errorf("opening wheel as zip: %w", err) + } + + cleanBase := filepath.Clean(sitePackagesPath) + string(filepath.Separator) + for _, f := range reader.File { + // G305: Protect against zip slip / path traversal. + targetPath := filepath.Join(sitePackagesPath, filepath.Clean(f.Name)) + if !strings.HasPrefix(targetPath, cleanBase) { + return errors.New("illegal file path in wheel archive: " + f.Name) + } + + if f.FileInfo().IsDir() { + if err := fsys.MkdirAll(targetPath, 0755); err != nil { + return fmt.Errorf("creating directory %s: %w", targetPath, err) + } + continue + } + + // Ensure parent directory exists + dir := filepath.Dir(targetPath) + if err := fsys.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating parent directory %s: %w", dir, err) + } + + rc, err := f.Open() + if err != nil { + return fmt.Errorf("opening %s in wheel: %w", f.Name, err) + } + + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return fmt.Errorf("reading %s from wheel: %w", f.Name, err) + } + + if err := fsys.WriteFile(targetPath, data, 0644); err != nil { + return fmt.Errorf("writing %s: %w", targetPath, err) + } + } + + return nil +} + +// writeInstallerFile writes the PEP 376 INSTALLER file into the .dist-info directory. +func writeInstallerFile(fsys apkfs.FullFS, sitePackagesPath string, wheelData []byte) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return err + } + + // Find the .dist-info directory + for _, f := range reader.File { + if strings.HasSuffix(f.Name, ".dist-info/METADATA") { + distInfoDir := filepath.Dir(f.Name) + installerPath := filepath.Join(sitePackagesPath, distInfoDir, "INSTALLER") + return fsys.WriteFile(installerPath, []byte("apko\n"), 0644) + } + } + + return nil +} + +// evaluateMarkers performs a simplified evaluation of PEP 508 environment markers. +// It handles the most common cases: +// - extra == "..." — only satisfied if the extra was requested +// - os_name, sys_platform, platform_system — always Linux +// - python_version — assumed satisfied (we already filtered wheels) +// - implementation_name — "cpython" +// +// For compound markers (and/or), we do best-effort evaluation. +func evaluateMarkers(markers string, requestedExtras []string) bool { + markers = strings.TrimSpace(markers) + + // Handle "or" — if any branch is true, the whole thing is true + if orParts := splitMarkerOr(markers); len(orParts) > 1 { + for _, part := range orParts { + if evaluateMarkers(part, requestedExtras) { + return true + } + } + return false + } + + // Handle "and" — all branches must be true + if andParts := splitMarkerAnd(markers); len(andParts) > 1 { + for _, part := range andParts { + if !evaluateMarkers(part, requestedExtras) { + return false + } + } + return true + } + + // Strip outer parens + markers = strings.TrimSpace(markers) + for strings.HasPrefix(markers, "(") && strings.HasSuffix(markers, ")") { + markers = strings.TrimSpace(markers[1 : len(markers)-1]) + } + + // Parse single comparison: key op value + key, op, value := parseMarkerExpr(markers) + if key == "" { + // Can't parse — be permissive, include the dep + return true + } + + switch key { + case "extra": + // Only include if the extra was explicitly requested + for _, e := range requestedExtras { + if matchMarkerOp(e, op, value) { + return true + } + } + return false + case "os_name": + return matchMarkerOp("posix", op, value) + case "sys_platform": + return matchMarkerOp("linux", op, value) + case "platform_system": + return matchMarkerOp("Linux", op, value) + case "implementation_name": + return matchMarkerOp("cpython", op, value) + case "python_version", "python_full_version", "platform_machine", + "platform_release", "platform_version", "implementation_version": + // Be permissive for version-related markers — we've already + // filtered wheels by Python version compatibility. + return true + default: + // Unknown marker — be permissive + return true + } +} + +// splitMarkerOr splits on " or " at the top level (not inside parens). +func splitMarkerOr(s string) []string { + return splitMarkerBool(s, " or ") +} + +// splitMarkerAnd splits on " and " at the top level (not inside parens). +func splitMarkerAnd(s string) []string { + return splitMarkerBool(s, " and ") +} + +func splitMarkerBool(s, sep string) []string { + var parts []string + depth := 0 + start := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '(': + depth++ + case ')': + depth-- + default: + if depth == 0 && i+len(sep) <= len(s) && s[i:i+len(sep)] == sep { + parts = append(parts, strings.TrimSpace(s[start:i])) + start = i + len(sep) + i += len(sep) - 1 + } + } + } + parts = append(parts, strings.TrimSpace(s[start:])) + if len(parts) == 1 && parts[0] == s { + return parts + } + return parts +} + +// parseMarkerExpr parses "key op 'value'" or "'value' op key". +func parseMarkerExpr(expr string) (key, op, value string) { + expr = strings.TrimSpace(expr) + + // Try patterns like: extra == "dev" or "linux" == sys_platform + for _, operator := range []string{"===", "~=", "==", "!=", ">=", "<=", ">", "<", " in ", " not in "} { + idx := strings.Index(expr, operator) + if idx < 0 { + continue + } + lhs := strings.TrimSpace(expr[:idx]) + rhs := strings.TrimSpace(expr[idx+len(operator):]) + + lhs = stripQuotes(lhs) + rhs = stripQuotes(rhs) + + // Figure out which side is the key vs the value + if isMarkerVar(lhs) { + return lhs, strings.TrimSpace(operator), rhs + } + if isMarkerVar(rhs) { + return rhs, flipOp(strings.TrimSpace(operator)), lhs + } + // Both look like values — treat lhs as key + return lhs, strings.TrimSpace(operator), rhs + } + return "", "", "" +} + +func stripQuotes(s string) string { + if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) { + return s[1 : len(s)-1] + } + return s +} + +func isMarkerVar(s string) bool { + switch s { + case "os_name", "sys_platform", "platform_machine", "platform_python_implementation", + "platform_release", "platform_system", "platform_version", + "python_version", "python_full_version", "implementation_name", + "implementation_version", "extra": + return true + } + return false +} + +func flipOp(op string) string { + switch op { + case ">": + return "<" + case "<": + return ">" + case ">=": + return "<=" + case "<=": + return ">=" + } + return op +} + +func matchMarkerOp(actual, op, expected string) bool { + switch op { + case "==", "===": + return actual == expected + case "!=": + return actual != expected + case "in": + return strings.Contains(expected, actual) + case "not in": + return !strings.Contains(expected, actual) + case ">=": + return actual >= expected + case "<=": + return actual <= expected + case ">": + return actual > expected + case "<": + return actual < expected + default: + return true + } +} + +// verifyChecksum verifies the SHA256 checksum of data against the expected value. +func verifyChecksum(data []byte, expected string) error { + if expected == "" { + return nil + } + + prefix := "sha256:" + if !strings.HasPrefix(expected, prefix) { + return fmt.Errorf("unsupported checksum format: %s", expected) + } + expectedHex := expected[len(prefix):] + + h := sha256.Sum256(data) + actualHex := hex.EncodeToString(h[:]) + + if actualHex != expectedHex { + return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedHex, actualHex) + } + + return nil +} diff --git a/pkg/ecosystem/python/wheel_test.go b/pkg/ecosystem/python/wheel_test.go new file mode 100644 index 000000000..5323da44a --- /dev/null +++ b/pkg/ecosystem/python/wheel_test.go @@ -0,0 +1,158 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "testing" + + apkfs "chainguard.dev/apko/pkg/apk/fs" +) + +func createTestWheel(t *testing.T, files map[string]string) []byte { + t.Helper() + var buf bytes.Buffer + w := zip.NewWriter(&buf) + for name, content := range files { + f, err := w.Create(name) + if err != nil { + t.Fatalf("creating file in zip: %v", err) + } + if _, err := f.Write([]byte(content)); err != nil { + t.Fatalf("writing file in zip: %v", err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("closing zip: %v", err) + } + return buf.Bytes() +} + +func TestExtractWheel(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "mypackage/__init__.py": "# init", + "mypackage/module.py": "def hello(): pass", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + "mypackage-1.0.0.dist-info/RECORD": "", + }) + + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + err := extractWheel(fs, wheelData, "usr/lib/python3.12/site-packages") + if err != nil { + t.Fatalf("extractWheel() error: %v", err) + } + + // Check that files were extracted + data, err := fs.ReadFile("usr/lib/python3.12/site-packages/mypackage/__init__.py") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "# init" { + t.Errorf("content = %q, want %q", string(data), "# init") + } + + data, err = fs.ReadFile("usr/lib/python3.12/site-packages/mypackage/module.py") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "def hello(): pass" { + t.Errorf("content = %q, want %q", string(data), "def hello(): pass") + } +} + +func TestWriteInstallerFile(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "mypackage/__init__.py": "# init", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + }) + + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/lib/python3.12/site-packages/mypackage-1.0.0.dist-info", 0755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + err := writeInstallerFile(fs, "usr/lib/python3.12/site-packages", wheelData) + if err != nil { + t.Fatalf("writeInstallerFile() error: %v", err) + } + + data, err := fs.ReadFile("usr/lib/python3.12/site-packages/mypackage-1.0.0.dist-info/INSTALLER") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "apko\n" { + t.Errorf("INSTALLER content = %q, want %q", string(data), "apko\n") + } +} + +func TestEvaluateMarkers(t *testing.T) { + tests := []struct { + name string + markers string + extras []string + want bool + }{ + {"no markers", "", nil, true}, + {"extra not requested", `extra == "dev"`, nil, false}, + {"extra requested", `extra == "dev"`, []string{"dev"}, true}, + {"wrong extra", `extra == "dev"`, []string{"test"}, false}, + {"os_name posix", `os_name == "posix"`, nil, true}, + {"os_name nt", `os_name == "nt"`, nil, false}, + {"sys_platform linux", `sys_platform == "linux"`, nil, true}, + {"sys_platform win32", `sys_platform == "win32"`, nil, false}, + {"platform_system Linux", `platform_system == "Linux"`, nil, true}, + {"python_version", `python_version >= "3.8"`, nil, true}, + {"compound and true", `python_version >= "3.8" and os_name == "posix"`, nil, true}, + {"compound and false", `os_name == "nt" and python_version >= "3.8"`, nil, false}, + {"compound or true", `os_name == "nt" or os_name == "posix"`, nil, true}, + {"compound or false", `os_name == "nt" or sys_platform == "win32"`, nil, false}, + {"extra and platform", `extra == "dev" and os_name == "posix"`, []string{"dev"}, true}, + {"extra and wrong platform", `extra == "dev" and os_name == "nt"`, []string{"dev"}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := evaluateMarkers(tt.markers, tt.extras) + if got != tt.want { + t.Errorf("evaluateMarkers(%q, %v) = %v, want %v", tt.markers, tt.extras, got, tt.want) + } + }) + } +} + +func TestVerifyChecksum(t *testing.T) { + data := []byte("hello world") + h := sha256.Sum256(data) + validChecksum := "sha256:" + hex.EncodeToString(h[:]) + + if err := verifyChecksum(data, validChecksum); err != nil { + t.Errorf("verifyChecksum() with valid checksum: %v", err) + } + + if err := verifyChecksum(data, "sha256:0000000000000000000000000000000000000000000000000000000000000000"); err == nil { + t.Error("verifyChecksum() with invalid checksum should return error") + } + + if err := verifyChecksum(data, ""); err != nil { + t.Error("verifyChecksum() with empty checksum should return nil") + } +} diff --git a/pkg/lock/lock.go b/pkg/lock/lock.go index aceb4e395..48f449b88 100644 --- a/pkg/lock/lock.go +++ b/pkg/lock/lock.go @@ -29,7 +29,18 @@ type LockContents struct { RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` Repositories []LockRepo `json:"repositories"` // Packages in order of installation -> for a single architecture. - Packages []LockPkg `json:"packages"` + Packages []LockPkg `json:"packages"` + EcosystemPackages []LockEcosystemPkg `json:"ecosystem_packages,omitempty"` +} + +// LockEcosystemPkg represents a locked non-APK ecosystem package. +type LockEcosystemPkg struct { + Ecosystem string `json:"ecosystem"` + Name string `json:"name"` + Version string `json:"version"` + URL string `json:"url"` + Checksum string `json:"checksum"` + Architecture string `json:"architecture"` } type LockPkg struct { diff --git a/pkg/tarfs/fs.go b/pkg/tarfs/fs.go index c37756b54..cb14b6ca9 100644 --- a/pkg/tarfs/fs.go +++ b/pkg/tarfs/fs.go @@ -59,6 +59,29 @@ type tarEntry struct { type memFS struct { tree *node + + // currentOwner is the owner name to stamp on new nodes. + // Set via SetCurrentOwner during ecosystem package installation. + currentOwner string + + // ownerSizes tracks the cumulative bytes written per owner, + // used to estimate installed size for layering budget decisions. + ownerSizes map[string]uint64 +} + +// SetCurrentOwner sets the owner name for any new filesystem nodes +// created via MkdirAll, WriteFile, Symlink, etc. Pass "" to clear. +// This is used by ecosystem package installers to tag files for layering. +func (m *memFS) SetCurrentOwner(owner string) { + m.currentOwner = owner +} + +// OwnerSize returns the total bytes written for the given owner. +func (m *memFS) OwnerSize(owner string) uint64 { + if m.ownerSizes == nil { + return 0 + } + return m.ownerSizes[owner] } func New() *memFS { @@ -267,6 +290,7 @@ func (m *memFS) Mkdir(path string, perms fs.FileMode) error { children: map[string]*node{}, xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, + owner: m.currentOwner, } return nil } @@ -316,6 +340,7 @@ func (m *memFS) MkdirAll(path string, perm fs.FileMode) error { children: map[string]*node{}, xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, + owner: m.currentOwner, } anode.children[part] = newnode } @@ -386,6 +411,7 @@ func (m *memFS) openFile(name string, flag int, perm fs.FileMode, linkCount int) dir: false, xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, + owner: m.currentOwner, } parentAnode.children[base] = anode } @@ -599,6 +625,7 @@ func (m *memFS) Mknod(path string, mode uint32, dev int) error { xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, modTime: anode.modTime, + owner: m.currentOwner, } return nil @@ -677,6 +704,7 @@ func (m *memFS) Symlink(oldname, newname string) error { xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, modTime: anode.modTime, + owner: m.currentOwner, } return nil } @@ -949,6 +977,15 @@ func (f *memFile) Write(p []byte) (n int, err error) { copy(f.node.data[f.offset:], p) } f.offset += int64(len(p)) + + // Track installed size per owner for ecosystem package layering. + if f.node.owner != "" && f.fs != nil { + if f.fs.ownerSizes == nil { + f.fs.ownerSizes = map[string]uint64{} + } + f.fs.ownerSizes[f.node.owner] += uint64(len(p)) + } + return len(p), nil } @@ -970,6 +1007,10 @@ type node struct { // This stores metadata for a tarfs-backed file. te *tarEntry + + // owner is set for files created by ecosystem package installers + // to track ownership for layering purposes. + owner string } func (n *node) fileInfo(parent, name string) fs.FileInfo { @@ -1035,3 +1076,13 @@ func (m *memFileInfo) Package() *apk.Package { return m.te.pkg } + +// Owner returns the name of the owner of this file. +// For APK-installed files, this is the package name. +// For ecosystem-installed files, this is the owner string set during install. +func (m *memFileInfo) Owner() string { + if m.te != nil && m.te.pkg != nil { + return m.te.pkg.Name + } + return m.owner +}