From 92e4e4e8bb64f881d7f6a488962810b3db74c004 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Fri, 3 Apr 2026 13:45:54 -0600 Subject: [PATCH 01/12] feat: add multi-ecosystem package support (Python/PyPI) Add a declarative ecosystem package system that allows installing packages from non-APK ecosystems (starting with Python/PyPI) directly into OCI images without shelling out to pip or any other tool. Packages are resolved via the PEP 503 Simple Repository API, downloaded as wheels, and extracted directly into the filesystem. The new `ecosystems.python` config block supports custom indexes, version constraints, and auto-detection of the installed Python version. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/cli/build.go | 6 + internal/cli/lock.go | 26 ++ pkg/build/build_implementation.go | 10 + pkg/build/options.go | 32 +++ pkg/build/types/image_configuration.go | 26 ++ pkg/build/types/types.go | 19 ++ pkg/ecosystem/ecosystem.go | 88 ++++++ pkg/ecosystem/pip/pip.go | 140 ++++++++++ pkg/ecosystem/pip/pip_test.go | 68 +++++ pkg/ecosystem/pip/platform.go | 222 +++++++++++++++ pkg/ecosystem/pip/platform_test.go | 202 ++++++++++++++ pkg/ecosystem/pip/resolve.go | 371 +++++++++++++++++++++++++ pkg/ecosystem/pip/resolve_test.go | 183 ++++++++++++ pkg/ecosystem/pip/wheel.go | 112 ++++++++ pkg/ecosystem/pip/wheel_test.go | 123 ++++++++ pkg/lock/lock.go | 21 +- 16 files changed, 1644 insertions(+), 5 deletions(-) create mode 100644 pkg/ecosystem/ecosystem.go create mode 100644 pkg/ecosystem/pip/pip.go create mode 100644 pkg/ecosystem/pip/pip_test.go create mode 100644 pkg/ecosystem/pip/platform.go create mode 100644 pkg/ecosystem/pip/platform_test.go create mode 100644 pkg/ecosystem/pip/resolve.go create mode 100644 pkg/ecosystem/pip/resolve_test.go create mode 100644 pkg/ecosystem/pip/wheel.go create mode 100644 pkg/ecosystem/pip/wheel_test.go diff --git a/internal/cli/build.go b/internal/cli/build.go index 946623846..86b17661f 100644 --- a/internal/cli/build.go +++ b/internal/cli/build.go @@ -60,6 +60,8 @@ func buildCmd() *cobra.Command { var includePaths []string var ignoreSignatures bool var sizeLimits options.SizeLimits + var extraPythonPackages []string + var extraPythonIndexes []string cmd := &cobra.Command{ Use: "build", @@ -119,6 +121,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin build.WithIncludePaths(includePaths), build.WithIgnoreSignatures(ignoreSignatures), build.WithSizeLimits(sizeLimits), + build.WithExtraEcosystemPackages("python", extraPythonPackages), + build.WithExtraEcosystemIndexes("python", extraPythonIndexes), ) }, } @@ -139,6 +143,8 @@ Along the image, apko will generate SBOMs (software bill of materials) describin cmd.Flags().StringVar(&lockfile, "lockfile", "", "a path to .lock.json file (e.g. produced by apko lock) that constraints versions of packages to the listed ones (default '' means no additional constraints)") cmd.Flags().StringSliceVar(&includePaths, "include-paths", []string{}, "Additional include paths where to look for input files (config, base image, etc.). By default apko will search for paths only in workdir. Include paths may be absolute, or relative. Relative paths are interpreted relative to workdir. For adding extra paths for packages, use --repository-append.") cmd.Flags().BoolVar(&ignoreSignatures, "ignore-signatures", false, "ignore repository signature verification") + cmd.Flags().StringSliceVar(&extraPythonPackages, "ecosystem-python-package-append", []string{}, "extra Python packages to include (e.g., flask==3.0.0)") + cmd.Flags().StringSliceVar(&extraPythonIndexes, "ecosystem-python-index-append", []string{}, "extra Python package index URLs to use") addClientLimitFlags(cmd, &sizeLimits) return cmd } diff --git a/internal/cli/lock.go b/internal/cli/lock.go index 67c10d6ff..c05348843 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -35,6 +35,8 @@ import ( apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/build" "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" + _ "chainguard.dev/apko/pkg/ecosystem/pip" pkglock "chainguard.dev/apko/pkg/lock" ) @@ -245,6 +247,30 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt } } + // Resolve ecosystem packages + for name, ecoConfig := range ic.Contents.Ecosystems { + installer, ok := ecosystem.Get(name) + if !ok { + return fmt.Errorf("unknown ecosystem: %s", name) + } + for _, arch := range archs { + resolved, err := installer.Resolve(ctx, ecoConfig, arch) + if err != nil { + return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err) + } + for _, pkg := range resolved { + lock.Contents.EcosystemPackages = append(lock.Contents.EcosystemPackages, pkglock.LockEcosystemPkg{ + Ecosystem: pkg.Ecosystem, + Name: pkg.Name, + Version: pkg.Version, + URL: pkg.URL, + Checksum: pkg.Checksum, + Architecture: arch.ToAPK(), + }) + } + } + } + // Sort keyrings by name for reproducible lock files sort.Slice(lock.Contents.Keyrings, func(i, j int) bool { return lock.Contents.Keyrings[i].Name < lock.Contents.Keyrings[j].Name diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index 2200af5f6..63f702835 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -36,6 +36,8 @@ import ( ldsocache "chainguard.dev/apko/internal/ldso-cache" "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" + _ "chainguard.dev/apko/pkg/ecosystem/pip" // Register pip ecosystem installer. "chainguard.dev/apko/pkg/lock" "chainguard.dev/apko/pkg/options" ) @@ -177,6 +179,14 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) } } + // Install ecosystem packages (pip, etc.) after APK packages so that + // the language runtime is available for version detection. + if len(bc.ic.Contents.Ecosystems) > 0 { + if err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch); err != nil { + return nil, fmt.Errorf("installing ecosystem packages: %w", err) + } + } + // For now adding additional accounts is banned when using base image. On the other hand, we don't want to // wipe out the users set in base. // If one wants to add a support for adding additional users they would need to look into this piece of code. diff --git a/pkg/build/options.go b/pkg/build/options.go index 0e25edd0c..85a9a1d7d 100644 --- a/pkg/build/options.go +++ b/pkg/build/options.go @@ -266,3 +266,35 @@ func WithSizeLimits(limits options.SizeLimits) Option { return nil } } + +// WithExtraEcosystemPackages adds extra ecosystem packages to the build. +func WithExtraEcosystemPackages(ecosystem string, packages []string) Option { + return func(bc *Context) error { + if len(packages) == 0 { + return nil + } + if bc.ic.Contents.Ecosystems == nil { + bc.ic.Contents.Ecosystems = make(map[string]types.EcosystemConfig) + } + eco := bc.ic.Contents.Ecosystems[ecosystem] + eco.Packages = append(eco.Packages, packages...) + bc.ic.Contents.Ecosystems[ecosystem] = eco + return nil + } +} + +// WithExtraEcosystemIndexes adds extra ecosystem indexes to the build. +func WithExtraEcosystemIndexes(ecosystem string, indexes []string) Option { + return func(bc *Context) error { + if len(indexes) == 0 { + return nil + } + if bc.ic.Contents.Ecosystems == nil { + bc.ic.Contents.Ecosystems = make(map[string]types.EcosystemConfig) + } + eco := bc.ic.Contents.Ecosystems[ecosystem] + eco.Indexes = append(eco.Indexes, indexes...) + bc.ic.Contents.Ecosystems[ecosystem] = eco + return nil + } +} diff --git a/pkg/build/types/image_configuration.go b/pkg/build/types/image_configuration.go index 245452a1f..cb744668b 100644 --- a/pkg/build/types/image_configuration.go +++ b/pkg/build/types/image_configuration.go @@ -181,6 +181,24 @@ func (i *ImageContents) MergeInto(target *ImageContents) error { if target.BaseImage == nil { target.BaseImage = i.BaseImage } + // Merge ecosystem configs + if len(i.Ecosystems) > 0 { + if target.Ecosystems == nil { + target.Ecosystems = make(map[string]EcosystemConfig) + } + for name, eco := range i.Ecosystems { + if existing, ok := target.Ecosystems[name]; ok { + existing.Indexes = slices.Concat(eco.Indexes, existing.Indexes) + existing.Packages = slices.Concat(eco.Packages, existing.Packages) + if existing.PythonVersion == "" { + existing.PythonVersion = eco.PythonVersion + } + target.Ecosystems[name] = existing + } else { + target.Ecosystems[name] = eco + } + } + } return nil } @@ -295,6 +313,14 @@ func (ic *ImageConfiguration) Summarize(ctx context.Context) { log.Infof(" - gid=%d(%s) members=%v", g.GID, g.GroupName, g.Members) } } + if len(ic.Contents.Ecosystems) > 0 { + log.Infof(" ecosystems:") + for name, eco := range ic.Contents.Ecosystems { + log.Infof(" %s:", name) + log.Infof(" indexes: %v", eco.Indexes) + log.Infof(" packages: %v", eco.Packages) + } + } if len(ic.Annotations) > 0 { log.Infof(" annotations:") for k, v := range ic.Annotations { diff --git a/pkg/build/types/types.go b/pkg/build/types/types.go index e920acc39..b569041f6 100644 --- a/pkg/build/types/types.go +++ b/pkg/build/types/types.go @@ -104,6 +104,16 @@ type BaseImageDescriptor struct { APKIndex string `json:"apkindex,omitempty" yaml:"apkindex,omitempty"` } +// EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., pip). +type EcosystemConfig struct { + // Indexes is a list of package index URLs (e.g., PyPI simple API URLs). + Indexes []string `json:"indexes,omitempty" yaml:"indexes,omitempty"` + // Packages is a list of package specifications (e.g., "flask==3.0.0"). + Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` + // PythonVersion overrides auto-detection of the Python version (e.g., "3.12"). + PythonVersion string `json:"python_version,omitempty" yaml:"python_version,omitempty"` +} + type ImageContents struct { // A list of apk repositories to use for pulling packages at build time, // which are not installed into /etc/apk/repositories in the image (to @@ -122,6 +132,8 @@ type ImageContents struct { Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` // Optional: Base image to build on top of. Warning: Experimental. BaseImage *BaseImageDescriptor `json:"baseimage,omitempty" yaml:"baseimage,omitempty" apko:"experimental"` + // Optional: Non-APK ecosystem packages to install (e.g., pip packages). + Ecosystems map[string]EcosystemConfig `json:"ecosystems,omitempty" yaml:"ecosystems,omitempty"` } // MarshalYAML implements yaml.Marshaler for ImageContents, redacting URLs in @@ -138,6 +150,13 @@ func (i ImageContents) MarshalYAML() (any, error) { return nil, err } + for name, eco := range ri.Ecosystems { + if err := processRepositoryURLs(eco.Indexes); err != nil { + return nil, err + } + ri.Ecosystems[name] = eco + } + for idx, key := range ri.Keyring { rawURL := key parsed, err := url.Parse(rawURL) diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go new file mode 100644 index 000000000..ce12200a2 --- /dev/null +++ b/pkg/ecosystem/ecosystem.go @@ -0,0 +1,88 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ecosystem + +import ( + "context" + "fmt" + "sync" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/build/types" +) + +// ResolvedPackage represents a package that has been resolved to a specific +// version and download URL. +type ResolvedPackage struct { + Ecosystem string + Name string + Version string + URL string + Checksum string // "sha256:" +} + +// Installer is the interface that ecosystem package installers must implement. +type Installer interface { + // Name returns the ecosystem name (e.g., "pip"). + Name() string + // Resolve resolves the requested packages to specific versions and URLs. + Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ResolvedPackage, error) + // Install extracts resolved packages into the filesystem. + Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage) error +} + +var ( + registryMu sync.RWMutex + registry = map[string]func() Installer{} +) + +// Register registers an ecosystem installer factory. +func Register(name string, factory func() Installer) { + registryMu.Lock() + defer registryMu.Unlock() + registry[name] = factory +} + +// Get returns an installer for the named ecosystem. +func Get(name string) (Installer, bool) { + registryMu.RLock() + defer registryMu.RUnlock() + factory, ok := registry[name] + if !ok { + return nil, false + } + return factory(), true +} + +// InstallAll installs packages for all configured ecosystems. +func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture) error { + for name, config := range ecosystems { + installer, ok := Get(name) + if !ok { + return fmt.Errorf("unknown ecosystem: %s", name) + } + resolved, err := installer.Resolve(ctx, config, arch) + if err != nil { + return fmt.Errorf("resolving %s packages: %w", name, err) + } + if len(resolved) == 0 { + continue + } + if err := installer.Install(ctx, fs, resolved); err != nil { + return fmt.Errorf("installing %s packages: %w", name, err) + } + } + return nil +} diff --git a/pkg/ecosystem/pip/pip.go b/pkg/ecosystem/pip/pip.go new file mode 100644 index 000000000..c5cd4487a --- /dev/null +++ b/pkg/ecosystem/pip/pip.go @@ -0,0 +1,140 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "context" + "fmt" + "io" + "net/http" + "strings" + + "github.com/chainguard-dev/clog" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" +) + +func init() { + ecosystem.Register("python", func() ecosystem.Installer { + return &installer{} + }) +} + +type installer struct{} + +func (i *installer) Name() string { return "python" } + +func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { + if len(config.Packages) == 0 { + return nil, nil + } + + specs := make([]packageSpec, 0, len(config.Packages)) + for _, pkg := range config.Packages { + specs = append(specs, parsePackageSpec(pkg)) + } + + indexes := config.Indexes + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + // We need a Python version to filter wheels. We'll use a default that + // callers can override via the config, or detect later during install. + pythonVersion := config.PythonVersion + if pythonVersion == "" { + pythonVersion = "3.12" + } + + return resolvePackages(ctx, specs, indexes, pythonVersion, arch) +} + +func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage) error { + log := clog.FromContext(ctx) + + pythonVersion := detectPythonVersion(fsys) + if pythonVersion == "" { + return fmt.Errorf("no Python installation found in filesystem; install python3 via APK first") + } + log.Infof("detected Python %s for pip ecosystem install", pythonVersion) + + sitePackagesPath := fmt.Sprintf("usr/lib/python%s/site-packages", pythonVersion) + if err := fsys.MkdirAll(sitePackagesPath, 0755); err != nil { + return fmt.Errorf("creating site-packages directory: %w", err) + } + + for _, pkg := range packages { + log.Infof("installing pip package %s==%s", pkg.Name, pkg.Version) + + data, err := downloadWheel(ctx, pkg.URL) + if err != nil { + return fmt.Errorf("downloading %s: %w", pkg.Name, err) + } + + if err := verifyChecksum(data, pkg.Checksum); err != nil { + return fmt.Errorf("verifying %s: %w", pkg.Name, err) + } + + if err := extractWheel(fsys, data, sitePackagesPath); err != nil { + return fmt.Errorf("extracting %s: %w", pkg.Name, err) + } + + if err := writeInstallerFile(fsys, sitePackagesPath, data); err != nil { + log.Debugf("could not write INSTALLER file for %s: %v", pkg.Name, err) + } + } + + return nil +} + +// detectPythonVersion scans the filesystem for a Python installation and +// returns the version string (e.g., "3.12"). +func detectPythonVersion(fsys apkfs.FullFS) string { + entries, err := fsys.ReadDir("usr/lib") + if err != nil { + return "" + } + + for _, entry := range entries { + name := entry.Name() + if strings.HasPrefix(name, "python3.") && entry.IsDir() { + return strings.TrimPrefix(name, "python") + } + } + + return "" +} + +// downloadWheel downloads a wheel file from the given URL. +func downloadWheel(ctx context.Context, url string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d downloading %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/ecosystem/pip/pip_test.go b/pkg/ecosystem/pip/pip_test.go new file mode 100644 index 000000000..514c00c2c --- /dev/null +++ b/pkg/ecosystem/pip/pip_test.go @@ -0,0 +1,68 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "testing" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" +) + +func TestInstallerRegistration(t *testing.T) { + inst, ok := ecosystem.Get("python") + if !ok { + t.Fatal("python installer not registered") + } + if inst.Name() != "python" { + t.Errorf("Name() = %q, want %q", inst.Name(), "python") + } +} + +func TestDetectPythonVersion(t *testing.T) { + fs := apkfs.NewMemFS() + + // No python installed + if v := detectPythonVersion(fs); v != "" { + t.Errorf("detectPythonVersion() = %q on empty fs, want empty", v) + } + + // Create python directory + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatal(err) + } + + v := detectPythonVersion(fs) + if v != "3.12" { + t.Errorf("detectPythonVersion() = %q, want %q", v, "3.12") + } +} + +func TestDetectPythonVersionMultiple(t *testing.T) { + fs := apkfs.NewMemFS() + + // Create multiple python versions - should return whichever is found first + if err := fs.MkdirAll("usr/lib/python3.11/site-packages", 0755); err != nil { + t.Fatal(err) + } + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatal(err) + } + + v := detectPythonVersion(fs) + if v != "3.11" && v != "3.12" { + t.Errorf("detectPythonVersion() = %q, want 3.11 or 3.12", v) + } +} diff --git a/pkg/ecosystem/pip/platform.go b/pkg/ecosystem/pip/platform.go new file mode 100644 index 000000000..54d1c7c14 --- /dev/null +++ b/pkg/ecosystem/pip/platform.go @@ -0,0 +1,222 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "fmt" + "strings" + + "chainguard.dev/apko/pkg/build/types" +) + +// platformTags returns the list of compatible wheel platform tags for the +// given architecture, ordered from most specific to least specific. +func platformTags(arch types.Architecture) []string { + switch arch { + case types.ParseArchitecture("amd64"): + return []string{ + "manylinux_2_17_x86_64", + "manylinux2014_x86_64", + "manylinux_2_5_x86_64", + "manylinux1_x86_64", + "linux_x86_64", + } + case types.ParseArchitecture("arm64"): + return []string{ + "manylinux_2_17_aarch64", + "manylinux2014_aarch64", + "linux_aarch64", + } + case types.ParseArchitecture("arm/v7"): + return []string{ + "manylinux_2_17_armv7l", + "manylinux2014_armv7l", + "linux_armv7l", + } + case types.ParseArchitecture("arm/v6"): + return []string{ + "manylinux_2_17_armv6l", + "linux_armv6l", + } + case types.ParseArchitecture("386"): + return []string{ + "manylinux_2_17_i686", + "manylinux2014_i686", + "manylinux_2_5_i686", + "manylinux1_i686", + "linux_i686", + } + case types.ParseArchitecture("ppc64le"): + return []string{ + "manylinux_2_17_ppc64le", + "manylinux2014_ppc64le", + "linux_ppc64le", + } + case types.ParseArchitecture("s390x"): + return []string{ + "manylinux_2_17_s390x", + "manylinux2014_s390x", + "linux_s390x", + } + case types.ParseArchitecture("riscv64"): + return []string{ + "manylinux_2_17_riscv64", + "linux_riscv64", + } + default: + return []string{"any"} + } +} + +// wheelFileParts holds the parsed components of a wheel filename per PEP 427. +// Format: {distribution}-{version}(-{build tag})?-{python tag}-{abi tag}-{platform tag}.whl +type wheelFileParts struct { + Distribution string + Version string + BuildTag string + PythonTag string + ABITag string + PlatformTag string +} + +// parseWheelFilename parses a wheel filename per PEP 427. +func parseWheelFilename(filename string) (wheelFileParts, error) { + name := strings.TrimSuffix(filename, ".whl") + if name == filename { + return wheelFileParts{}, fmt.Errorf("not a wheel file: %s", filename) + } + + parts := strings.Split(name, "-") + switch len(parts) { + case 5: + return wheelFileParts{ + Distribution: parts[0], + Version: parts[1], + PythonTag: parts[2], + ABITag: parts[3], + PlatformTag: parts[4], + }, nil + case 6: + return wheelFileParts{ + Distribution: parts[0], + Version: parts[1], + BuildTag: parts[2], + PythonTag: parts[3], + ABITag: parts[4], + PlatformTag: parts[5], + }, nil + default: + return wheelFileParts{}, fmt.Errorf("invalid wheel filename: %s", filename) + } +} + +// isCompatibleWheel checks whether a wheel file is compatible with the given +// Python version and architecture. +func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Architecture) bool { + // Check python tag compatibility + if !isCompatiblePythonTag(w.PythonTag, pythonVersion) { + return false + } + + // Check ABI compatibility + if !isCompatibleABI(w.ABITag, pythonVersion) { + return false + } + + // Check platform compatibility + return isCompatiblePlatform(w.PlatformTag, arch) +} + +// isCompatiblePythonTag checks if the wheel's python tag is compatible. +// E.g., "py3", "cp312", "py2.py3" +func isCompatiblePythonTag(tag, pythonVersion string) bool { + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for _, t := range strings.Split(tag, ".") { + if t == "py3" || t == "py2.py3" || t == cpTag { + return true + } + } + return false +} + +// isCompatibleABI checks if the wheel's ABI tag is compatible. +func isCompatibleABI(tag, pythonVersion string) bool { + if tag == "none" { + return true + } + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for _, t := range strings.Split(tag, ".") { + if t == "abi3" || t == cpTag { + return true + } + } + return false +} + +// isCompatiblePlatform checks if the wheel's platform tag is compatible. +func isCompatiblePlatform(tag string, arch types.Architecture) bool { + if tag == "any" { + return true + } + compatible := platformTags(arch) + for _, t := range strings.Split(tag, ".") { + for _, c := range compatible { + if t == c { + return true + } + } + } + return false +} + +// wheelScore returns a priority score for the wheel. Higher is better. +// Binary wheels for the exact platform are preferred over pure-Python wheels. +func wheelScore(w wheelFileParts, pythonVersion string, arch types.Architecture) int { + score := 0 + + // Prefer exact CPython tag over generic py3 + cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") + for _, t := range strings.Split(w.PythonTag, ".") { + if t == cpTag { + score += 100 + break + } + } + + // Prefer specific ABI over none/abi3 + for _, t := range strings.Split(w.ABITag, ".") { + if t == cpTag { + score += 50 + } else if t == "abi3" { + score += 25 + } + } + + // Prefer specific platform over any + if w.PlatformTag != "any" { + platTags := platformTags(arch) + for i, pt := range platTags { + for _, t := range strings.Split(w.PlatformTag, ".") { + if t == pt { + // More specific platforms (earlier in list) get higher scores + score += 10 * (len(platTags) - i) + break + } + } + } + } + + return score +} diff --git a/pkg/ecosystem/pip/platform_test.go b/pkg/ecosystem/pip/platform_test.go new file mode 100644 index 000000000..e96edd4d7 --- /dev/null +++ b/pkg/ecosystem/pip/platform_test.go @@ -0,0 +1,202 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "testing" + + "chainguard.dev/apko/pkg/build/types" +) + +func TestPlatformTags(t *testing.T) { + tests := []struct { + arch string + wantLen int + wantAny string // At least one tag should contain this + }{ + {"amd64", 5, "x86_64"}, + {"arm64", 3, "aarch64"}, + {"arm/v7", 3, "armv7l"}, + {"386", 5, "i686"}, + {"ppc64le", 3, "ppc64le"}, + {"s390x", 3, "s390x"}, + } + + for _, tt := range tests { + t.Run(tt.arch, func(t *testing.T) { + tags := platformTags(types.ParseArchitecture(tt.arch)) + if len(tags) != tt.wantLen { + t.Errorf("platformTags(%s) returned %d tags, want %d", tt.arch, len(tags), tt.wantLen) + } + found := false + for _, tag := range tags { + if contains(tag, tt.wantAny) { + found = true + break + } + } + if !found { + t.Errorf("platformTags(%s) = %v, none contain %q", tt.arch, tags, tt.wantAny) + } + }) + } +} + +func contains(s, substr string) bool { + return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstr(s, substr)) +} + +func containsSubstr(s, substr string) bool { + for i := 0; i <= len(s)-len(substr); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +} + +func TestParseWheelFilename(t *testing.T) { + tests := []struct { + filename string + wantDist string + wantVer string + wantPy string + wantABI string + wantPlat string + wantErr bool + }{ + { + filename: "Flask-3.0.0-py3-none-any.whl", + wantDist: "Flask", + wantVer: "3.0.0", + wantPy: "py3", + wantABI: "none", + wantPlat: "any", + }, + { + filename: "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.whl", + wantDist: "numpy", + wantVer: "1.26.0", + wantPy: "cp312", + wantABI: "cp312", + wantPlat: "manylinux_2_17_x86_64", + }, + { + filename: "notawheel.tar.gz", + wantErr: true, + }, + { + filename: "bad-name.whl", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.filename, func(t *testing.T) { + parts, err := parseWheelFilename(tt.filename) + if tt.wantErr { + if err == nil { + t.Error("expected error, got nil") + } + return + } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if parts.Distribution != tt.wantDist { + t.Errorf("Distribution = %q, want %q", parts.Distribution, tt.wantDist) + } + if parts.Version != tt.wantVer { + t.Errorf("Version = %q, want %q", parts.Version, tt.wantVer) + } + if parts.PythonTag != tt.wantPy { + t.Errorf("PythonTag = %q, want %q", parts.PythonTag, tt.wantPy) + } + if parts.ABITag != tt.wantABI { + t.Errorf("ABITag = %q, want %q", parts.ABITag, tt.wantABI) + } + if parts.PlatformTag != tt.wantPlat { + t.Errorf("PlatformTag = %q, want %q", parts.PlatformTag, tt.wantPlat) + } + }) + } +} + +func TestIsCompatibleWheel(t *testing.T) { + tests := []struct { + name string + wheel wheelFileParts + pyVer string + arch string + want bool + }{ + { + name: "pure python wheel is always compatible", + wheel: wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"}, + pyVer: "3.12", + arch: "amd64", + want: true, + }, + { + name: "cpython binary for matching arch", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", + arch: "amd64", + want: true, + }, + { + name: "cpython binary for wrong arch", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_aarch64"}, + pyVer: "3.12", + arch: "amd64", + want: false, + }, + { + name: "wrong python version", + wheel: wheelFileParts{PythonTag: "cp311", ABITag: "cp311", PlatformTag: "any"}, + pyVer: "3.12", + arch: "amd64", + want: false, + }, + { + name: "abi3 is compatible", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "abi3", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", + arch: "amd64", + want: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := isCompatibleWheel(tt.wheel, tt.pyVer, types.ParseArchitecture(tt.arch)) + if got != tt.want { + t.Errorf("isCompatibleWheel() = %v, want %v", got, tt.want) + } + }) + } +} + +func TestWheelScore(t *testing.T) { + pureWheel := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} + binaryWheel := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} + + pureScore := wheelScore(pureWheel, "3.12", types.ParseArchitecture("amd64")) + binaryScore := wheelScore(binaryWheel, "3.12", types.ParseArchitecture("amd64")) + + if binaryScore <= pureScore { + t.Errorf("binary wheel score (%d) should be higher than pure wheel score (%d)", binaryScore, pureScore) + } +} diff --git a/pkg/ecosystem/pip/resolve.go b/pkg/ecosystem/pip/resolve.go new file mode 100644 index 000000000..1c08c272c --- /dev/null +++ b/pkg/ecosystem/pip/resolve.go @@ -0,0 +1,371 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "context" + "fmt" + "io" + "net/http" + "regexp" + "strings" + + "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" + + "github.com/chainguard-dev/clog" +) + +const defaultIndex = "https://pypi.org/simple/" + +// packageSpec represents a parsed package requirement (e.g., "flask==3.0.0"). +type packageSpec struct { + Name string + Operator string // "==", ">=", "<=", "!=", "~=", "" + Version string + Extras []string + Markers string +} + +// parsePackageSpec parses a PEP 508-style requirement string. +func parsePackageSpec(spec string) packageSpec { + ps := packageSpec{} + + // Strip environment markers + if idx := strings.Index(spec, ";"); idx != -1 { + ps.Markers = strings.TrimSpace(spec[idx+1:]) + spec = strings.TrimSpace(spec[:idx]) + } + + // Strip extras + if lbIdx := strings.Index(spec, "["); lbIdx != -1 { + if rbIdx := strings.Index(spec, "]"); rbIdx != -1 { + extras := spec[lbIdx+1 : rbIdx] + ps.Extras = strings.Split(extras, ",") + for i := range ps.Extras { + ps.Extras[i] = strings.TrimSpace(ps.Extras[i]) + } + spec = spec[:lbIdx] + spec[rbIdx+1:] + } + } + + spec = strings.TrimSpace(spec) + + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + if idx := strings.Index(spec, op); idx != -1 { + ps.Name = strings.TrimSpace(spec[:idx]) + ps.Operator = op + ps.Version = strings.TrimSpace(spec[idx+len(op):]) + return ps + } + } + + ps.Name = spec + return ps +} + +// normalizeName normalizes a Python package name per PEP 503. +func normalizeName(name string) string { + return strings.ToLower(regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")) +} + +// wheelLink represents a parsed link from a PEP 503 Simple API response. +type wheelLink struct { + Filename string + URL string + Checksum string // "sha256:" + RequiresPython string +} + +// parseSimpleIndex parses the HTML from a PEP 503 Simple Repository API response. +func parseSimpleIndex(body string, baseURL string) []wheelLink { + var links []wheelLink + + // Simple regex-based parsing of tags + linkRe := regexp.MustCompile(`]*href="([^"]*)"[^>]*>([^<]*)`) + requiresPythonRe := regexp.MustCompile(`data-requires-python="([^"]*)"`) + + for _, match := range linkRe.FindAllStringSubmatch(body, -1) { + href := match[1] + filename := strings.TrimSpace(match[2]) + + if !strings.HasSuffix(filename, ".whl") { + continue + } + + var checksum string + if hashIdx := strings.Index(href, "#sha256="); hashIdx != -1 { + checksum = "sha256:" + href[hashIdx+8:] + href = href[:hashIdx] + } + + // Resolve relative URLs + url := href + if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { + url = strings.TrimSuffix(baseURL, "/") + "/" + strings.TrimPrefix(href, "/") + } + + var requiresPython string + // Check if there's a data-requires-python attribute in the full tag + tagStart := strings.LastIndex(body[:strings.Index(body, match[0])+1], "= 0 { + tagEnd := strings.Index(body[tagStart:], ">") + tagStart + tag := body[tagStart : tagEnd+1] + if rpMatch := requiresPythonRe.FindStringSubmatch(tag); rpMatch != nil { + requiresPython = strings.ReplaceAll(rpMatch[1], ">", ">") + requiresPython = strings.ReplaceAll(requiresPython, "<", "<") + requiresPython = strings.ReplaceAll(requiresPython, "&", "&") + } + } + + links = append(links, wheelLink{ + Filename: filename, + URL: url, + Checksum: checksum, + RequiresPython: requiresPython, + }) + } + + return links +} + +// resolvePackages resolves package specs to specific wheel URLs using PEP 503. +func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { + log := clog.FromContext(ctx) + + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + var resolved []ecosystem.ResolvedPackage + seen := map[string]bool{} + + for _, spec := range specs { + if seen[normalizeName(spec.Name)] { + continue + } + + pkg, err := resolveOne(ctx, spec, indexes, pythonVersion, arch) + if err != nil { + return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) + } + seen[normalizeName(spec.Name)] = true + resolved = append(resolved, pkg) + log.Debugf("resolved %s==%s from %s", pkg.Name, pkg.Version, pkg.URL) + } + + return resolved, nil +} + +// resolveOne resolves a single package spec to a wheel URL. +func resolveOne(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, error) { + name := normalizeName(spec.Name) + + for _, index := range indexes { + indexURL := strings.TrimSuffix(index, "/") + "/" + name + "/" + + body, err := fetchSimpleIndex(ctx, indexURL) + if err != nil { + clog.FromContext(ctx).Debugf("index %s: %v", indexURL, err) + continue + } + + links := parseSimpleIndex(body, indexURL) + if len(links) == 0 { + continue + } + + best, err := selectBestWheel(links, spec, pythonVersion, arch) + if err != nil { + continue + } + + return ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: spec.Name, + Version: best.version, + URL: best.url, + Checksum: best.checksum, + }, nil + } + + return ecosystem.ResolvedPackage{}, fmt.Errorf("package %s not found in any index", spec.Name) +} + +type selectedWheel struct { + version string + url string + checksum string +} + +// selectBestWheel selects the best compatible wheel from a list of links. +func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, arch types.Architecture) (selectedWheel, error) { + var bestLink *wheelLink + var bestParts wheelFileParts + bestScore := -1 + + for i, link := range links { + parts, err := parseWheelFilename(link.Filename) + if err != nil { + continue + } + + if !isCompatibleWheel(parts, pythonVersion, arch) { + continue + } + + if !matchesVersionSpec(parts.Version, spec) { + continue + } + + score := wheelScore(parts, pythonVersion, arch) + if bestLink == nil || compareVersions(parts.Version, bestParts.Version) > 0 || (compareVersions(parts.Version, bestParts.Version) == 0 && score > bestScore) { + bestLink = &links[i] + bestParts = parts + bestScore = score + } + } + + if bestLink == nil { + return selectedWheel{}, fmt.Errorf("no compatible wheel found") + } + + return selectedWheel{ + version: bestParts.Version, + url: bestLink.URL, + checksum: bestLink.Checksum, + }, nil +} + +// matchesVersionSpec checks if a version matches the given spec. +func matchesVersionSpec(version string, spec packageSpec) bool { + if spec.Operator == "" { + return true + } + switch spec.Operator { + case "==": + return version == spec.Version + case "!=": + return version != spec.Version + case ">=": + return compareVersions(version, spec.Version) >= 0 + case "<=": + return compareVersions(version, spec.Version) <= 0 + case ">": + return compareVersions(version, spec.Version) > 0 + case "<": + return compareVersions(version, spec.Version) < 0 + case "~=": + // Compatible release: ~=X.Y is equivalent to >=X.Y, ==X.* + if compareVersions(version, spec.Version) < 0 { + return false + } + specParts := strings.Split(spec.Version, ".") + verParts := strings.Split(version, ".") + if len(specParts) < 2 || len(verParts) < 2 { + return false + } + // Major parts must match up to second-to-last + for i := 0; i < len(specParts)-1 && i < len(verParts); i++ { + if verParts[i] != specParts[i] { + return false + } + } + return true + } + return false +} + +// compareVersions performs a simple version comparison. +// Returns -1, 0, or 1. +func compareVersions(a, b string) int { + aParts := strings.Split(a, ".") + bParts := strings.Split(b, ".") + + maxLen := len(aParts) + if len(bParts) > maxLen { + maxLen = len(bParts) + } + + for i := 0; i < maxLen; i++ { + var aVal, bVal string + if i < len(aParts) { + aVal = aParts[i] + } else { + aVal = "0" + } + if i < len(bParts) { + bVal = bParts[i] + } else { + bVal = "0" + } + if aVal == bVal { + continue + } + // Try numeric comparison + aNum := parseVersionPart(aVal) + bNum := parseVersionPart(bVal) + if aNum != bNum { + if aNum < bNum { + return -1 + } + return 1 + } + // Fall back to string comparison + if aVal < bVal { + return -1 + } + return 1 + } + return 0 +} + +func parseVersionPart(s string) int { + n := 0 + for _, c := range s { + if c >= '0' && c <= '9' { + n = n*10 + int(c-'0') + } else { + break + } + } + return n +} + +// fetchSimpleIndex fetches the PEP 503 Simple API page for a package. +func fetchSimpleIndex(ctx context.Context, url string) (string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return "", err + } + req.Header.Set("Accept", "text/html") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return "", err + } + + return string(body), nil +} diff --git a/pkg/ecosystem/pip/resolve_test.go b/pkg/ecosystem/pip/resolve_test.go new file mode 100644 index 000000000..ff227d45c --- /dev/null +++ b/pkg/ecosystem/pip/resolve_test.go @@ -0,0 +1,183 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "chainguard.dev/apko/pkg/build/types" +) + +func TestParsePackageSpec(t *testing.T) { + tests := []struct { + input string + name string + op string + version string + markers string + }{ + {"flask==3.0.0", "flask", "==", "3.0.0", ""}, + {"requests>=2.31.0", "requests", ">=", "2.31.0", ""}, + {"numpy", "numpy", "", "", ""}, + {"foo~=1.4.2", "foo", "~=", "1.4.2", ""}, + {"bar!=2.0", "bar", "!=", "2.0", ""}, + {`baz>=1.0; python_version>="3.8"`, "baz", ">=", "1.0", `python_version>="3.8"`}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + spec := parsePackageSpec(tt.input) + if spec.Name != tt.name { + t.Errorf("Name = %q, want %q", spec.Name, tt.name) + } + if spec.Operator != tt.op { + t.Errorf("Operator = %q, want %q", spec.Operator, tt.op) + } + if spec.Version != tt.version { + t.Errorf("Version = %q, want %q", spec.Version, tt.version) + } + if spec.Markers != tt.markers { + t.Errorf("Markers = %q, want %q", spec.Markers, tt.markers) + } + }) + } +} + +func TestNormalizeName(t *testing.T) { + tests := []struct { + input string + want string + }{ + {"Flask", "flask"}, + {"my-package", "my-package"}, + {"my_package", "my-package"}, + {"My.Package", "my-package"}, + {"My---Package", "my-package"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + got := normalizeName(tt.input) + if got != tt.want { + t.Errorf("normalizeName(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestParseSimpleIndex(t *testing.T) { + body := ` + +Flask-3.0.0-py3-none-any.whl +Flask-2.3.0-py3-none-any.whl +Flask-3.0.0.tar.gz + +` + links := parseSimpleIndex(body, "https://pypi.org/simple/flask/") + if len(links) != 2 { + t.Fatalf("expected 2 wheel links, got %d", len(links)) + } + + if links[0].Filename != "Flask-3.0.0-py3-none-any.whl" { + t.Errorf("links[0].Filename = %q", links[0].Filename) + } + if links[0].Checksum != "sha256:abc123" { + t.Errorf("links[0].Checksum = %q", links[0].Checksum) + } +} + +func TestCompareVersions(t *testing.T) { + tests := []struct { + a, b string + want int + }{ + {"1.0.0", "1.0.0", 0}, + {"2.0.0", "1.0.0", 1}, + {"1.0.0", "2.0.0", -1}, + {"1.10.0", "1.9.0", 1}, + {"1.0", "1.0.0", 0}, + } + + for _, tt := range tests { + t.Run(tt.a+"_vs_"+tt.b, func(t *testing.T) { + got := compareVersions(tt.a, tt.b) + if got != tt.want { + t.Errorf("compareVersions(%q, %q) = %d, want %d", tt.a, tt.b, got, tt.want) + } + }) + } +} + +func TestMatchesVersionSpec(t *testing.T) { + tests := []struct { + version string + spec packageSpec + want bool + }{ + {"3.0.0", packageSpec{Operator: "==", Version: "3.0.0"}, true}, + {"3.0.1", packageSpec{Operator: "==", Version: "3.0.0"}, false}, + {"3.0.0", packageSpec{Operator: ">=", Version: "2.0.0"}, true}, + {"1.0.0", packageSpec{Operator: ">=", Version: "2.0.0"}, false}, + {"3.0.0", packageSpec{Operator: "", Version: ""}, true}, + {"1.4.3", packageSpec{Operator: "~=", Version: "1.4.2"}, true}, + {"2.0.0", packageSpec{Operator: "~=", Version: "1.4.2"}, false}, + } + + for _, tt := range tests { + t.Run(tt.version+"_"+tt.spec.Operator+tt.spec.Version, func(t *testing.T) { + got := matchesVersionSpec(tt.version, tt.spec) + if got != tt.want { + t.Errorf("matchesVersionSpec(%q, %v) = %v, want %v", tt.version, tt.spec, got, tt.want) + } + }) + } +} + +func TestResolveWithMockServer(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/simple/flask/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` +Flask-3.0.0-py3-none-any.whl +Flask-2.3.0-py3-none-any.whl +`)) + }) + + server := httptest.NewServer(mux) + defer server.Close() + + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + if len(resolved) != 1 { + t.Fatalf("expected 1 resolved package, got %d", len(resolved)) + } + + if resolved[0].Name != "flask" { + t.Errorf("Name = %q, want %q", resolved[0].Name, "flask") + } + if resolved[0].Version != "3.0.0" { + t.Errorf("Version = %q, want %q", resolved[0].Version, "3.0.0") + } + if resolved[0].Checksum != "sha256:abc123" { + t.Errorf("Checksum = %q, want %q", resolved[0].Checksum, "sha256:abc123") + } +} diff --git a/pkg/ecosystem/pip/wheel.go b/pkg/ecosystem/pip/wheel.go new file mode 100644 index 000000000..32d396da8 --- /dev/null +++ b/pkg/ecosystem/pip/wheel.go @@ -0,0 +1,112 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "path/filepath" + "strings" + + apkfs "chainguard.dev/apko/pkg/apk/fs" +) + +// extractWheel extracts a wheel (.whl) file into the filesystem at the given +// site-packages path. A .whl file is a ZIP archive. +func extractWheel(fsys apkfs.FullFS, wheelData []byte, sitePackagesPath string) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return fmt.Errorf("opening wheel as zip: %w", err) + } + + for _, f := range reader.File { + targetPath := filepath.Join(sitePackagesPath, f.Name) + + if f.FileInfo().IsDir() { + if err := fsys.MkdirAll(targetPath, 0755); err != nil { + return fmt.Errorf("creating directory %s: %w", targetPath, err) + } + continue + } + + // Ensure parent directory exists + dir := filepath.Dir(targetPath) + if err := fsys.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating parent directory %s: %w", dir, err) + } + + rc, err := f.Open() + if err != nil { + return fmt.Errorf("opening %s in wheel: %w", f.Name, err) + } + + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return fmt.Errorf("reading %s from wheel: %w", f.Name, err) + } + + if err := fsys.WriteFile(targetPath, data, 0644); err != nil { + return fmt.Errorf("writing %s: %w", targetPath, err) + } + } + + return nil +} + +// writeInstallerFile writes the PEP 376 INSTALLER file into the .dist-info directory. +func writeInstallerFile(fsys apkfs.FullFS, sitePackagesPath string, wheelData []byte) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return err + } + + // Find the .dist-info directory + for _, f := range reader.File { + if strings.HasSuffix(f.Name, ".dist-info/METADATA") { + distInfoDir := filepath.Dir(f.Name) + installerPath := filepath.Join(sitePackagesPath, distInfoDir, "INSTALLER") + return fsys.WriteFile(installerPath, []byte("apko\n"), 0644) + } + } + + return nil +} + +// verifyChecksum verifies the SHA256 checksum of data against the expected value. +func verifyChecksum(data []byte, expected string) error { + if expected == "" { + return nil + } + + prefix := "sha256:" + if !strings.HasPrefix(expected, prefix) { + return fmt.Errorf("unsupported checksum format: %s", expected) + } + expectedHex := expected[len(prefix):] + + h := sha256.Sum256(data) + actualHex := hex.EncodeToString(h[:]) + + if actualHex != expectedHex { + return fmt.Errorf("checksum mismatch: expected %s, got %s", expectedHex, actualHex) + } + + return nil +} diff --git a/pkg/ecosystem/pip/wheel_test.go b/pkg/ecosystem/pip/wheel_test.go new file mode 100644 index 000000000..f16fac12f --- /dev/null +++ b/pkg/ecosystem/pip/wheel_test.go @@ -0,0 +1,123 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package pip + +import ( + "archive/zip" + "bytes" + "crypto/sha256" + "encoding/hex" + "testing" + + apkfs "chainguard.dev/apko/pkg/apk/fs" +) + +func createTestWheel(t *testing.T, files map[string]string) []byte { + t.Helper() + var buf bytes.Buffer + w := zip.NewWriter(&buf) + for name, content := range files { + f, err := w.Create(name) + if err != nil { + t.Fatalf("creating file in zip: %v", err) + } + if _, err := f.Write([]byte(content)); err != nil { + t.Fatalf("writing file in zip: %v", err) + } + } + if err := w.Close(); err != nil { + t.Fatalf("closing zip: %v", err) + } + return buf.Bytes() +} + +func TestExtractWheel(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "mypackage/__init__.py": "# init", + "mypackage/module.py": "def hello(): pass", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + "mypackage-1.0.0.dist-info/RECORD": "", + }) + + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/lib/python3.12/site-packages", 0755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + err := extractWheel(fs, wheelData, "usr/lib/python3.12/site-packages") + if err != nil { + t.Fatalf("extractWheel() error: %v", err) + } + + // Check that files were extracted + data, err := fs.ReadFile("usr/lib/python3.12/site-packages/mypackage/__init__.py") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "# init" { + t.Errorf("content = %q, want %q", string(data), "# init") + } + + data, err = fs.ReadFile("usr/lib/python3.12/site-packages/mypackage/module.py") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "def hello(): pass" { + t.Errorf("content = %q, want %q", string(data), "def hello(): pass") + } +} + +func TestWriteInstallerFile(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "mypackage/__init__.py": "# init", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + }) + + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/lib/python3.12/site-packages/mypackage-1.0.0.dist-info", 0755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + + err := writeInstallerFile(fs, "usr/lib/python3.12/site-packages", wheelData) + if err != nil { + t.Fatalf("writeInstallerFile() error: %v", err) + } + + data, err := fs.ReadFile("usr/lib/python3.12/site-packages/mypackage-1.0.0.dist-info/INSTALLER") + if err != nil { + t.Fatalf("ReadFile: %v", err) + } + if string(data) != "apko\n" { + t.Errorf("INSTALLER content = %q, want %q", string(data), "apko\n") + } +} + +func TestVerifyChecksum(t *testing.T) { + data := []byte("hello world") + h := sha256.Sum256(data) + validChecksum := "sha256:" + hex.EncodeToString(h[:]) + + if err := verifyChecksum(data, validChecksum); err != nil { + t.Errorf("verifyChecksum() with valid checksum: %v", err) + } + + if err := verifyChecksum(data, "sha256:0000000000000000000000000000000000000000000000000000000000000000"); err == nil { + t.Error("verifyChecksum() with invalid checksum should return error") + } + + if err := verifyChecksum(data, ""); err != nil { + t.Error("verifyChecksum() with empty checksum should return nil") + } +} diff --git a/pkg/lock/lock.go b/pkg/lock/lock.go index aceb4e395..870661f8c 100644 --- a/pkg/lock/lock.go +++ b/pkg/lock/lock.go @@ -24,12 +24,23 @@ type Config struct { } type LockContents struct { - Keyrings []LockKeyring `json:"keyring"` - BuildRepositories []LockRepo `json:"build_repositories"` - RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` - Repositories []LockRepo `json:"repositories"` + Keyrings []LockKeyring `json:"keyring"` + BuildRepositories []LockRepo `json:"build_repositories"` + RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` + Repositories []LockRepo `json:"repositories"` // Packages in order of installation -> for a single architecture. - Packages []LockPkg `json:"packages"` + Packages []LockPkg `json:"packages"` + EcosystemPackages []LockEcosystemPkg `json:"ecosystem_packages,omitempty"` +} + +// LockEcosystemPkg represents a locked non-APK ecosystem package. +type LockEcosystemPkg struct { + Ecosystem string `json:"ecosystem"` + Name string `json:"name"` + Version string `json:"version"` + URL string `json:"url"` + Checksum string `json:"checksum"` + Architecture string `json:"architecture"` } type LockPkg struct { From 378c2c4f011b63ff29a60a382378cdb5aa472530 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Fri, 3 Apr 2026 14:04:36 -0600 Subject: [PATCH 02/12] feat: add transitive dependency resolution via PyPI JSON API Use the PyPI JSON API (pypi.org/pypi/{name}/{version}/json) to resolve packages and discover transitive dependencies, instead of downloading entire wheels just to read their METADATA files. The JSON API returns clean requires_dist lists and wheel URLs with checksums in a single request. Falls back to the PEP 503 Simple API for non-PyPI indexes (private registries), though without transitive resolution in that case. Also adds environment marker evaluation (extra, os_name, sys_platform, etc.) to correctly filter conditional dependencies, and pre-release filtering to avoid resolving alpha/beta/rc versions unless pinned. Tested with torch==2.6.0 which correctly resolves all 24 transitive dependencies automatically. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/ecosystem/pip/resolve.go | 393 +++++++++++++++++++++++++----- pkg/ecosystem/pip/resolve_test.go | 206 +++++++++++++++- pkg/ecosystem/pip/wheel.go | 238 ++++++++++++++++++ pkg/ecosystem/pip/wheel_test.go | 127 ++++++++++ 4 files changed, 893 insertions(+), 71 deletions(-) diff --git a/pkg/ecosystem/pip/resolve.go b/pkg/ecosystem/pip/resolve.go index 1c08c272c..af88a79ad 100644 --- a/pkg/ecosystem/pip/resolve.go +++ b/pkg/ecosystem/pip/resolve.go @@ -16,9 +16,11 @@ package pip import ( "context" + "encoding/json" "fmt" "io" "net/http" + neturl "net/url" "regexp" "strings" @@ -29,14 +31,25 @@ import ( ) const defaultIndex = "https://pypi.org/simple/" +const pypiJSONBaseDefault = "https://pypi.org/pypi/" + +// pypiJSONBaseOverride allows tests to redirect the JSON API to a mock server. +var pypiJSONBaseOverride string + +func pypiJSONBase() string { + if pypiJSONBaseOverride != "" { + return pypiJSONBaseOverride + } + return pypiJSONBaseDefault +} // packageSpec represents a parsed package requirement (e.g., "flask==3.0.0"). type packageSpec struct { - Name string - Operator string // "==", ">=", "<=", "!=", "~=", "" - Version string - Extras []string - Markers string + Name string + Operator string // "==", ">=", "<=", "!=", "~=", "" + Version string + Extras []string + Markers string } // parsePackageSpec parses a PEP 508-style requirement string. @@ -63,15 +76,45 @@ func parsePackageSpec(spec string) packageSpec { spec = strings.TrimSpace(spec) - for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { - if idx := strings.Index(spec, op); idx != -1 { - ps.Name = strings.TrimSpace(spec[:idx]) - ps.Operator = op - ps.Version = strings.TrimSpace(spec[idx+len(op):]) + // Handle parenthesized version constraints: "package (>=1.0)" + if lpIdx := strings.Index(spec, "("); lpIdx != -1 { + if rpIdx := strings.LastIndex(spec, ")"); rpIdx > lpIdx { + ps.Name = strings.TrimSpace(spec[:lpIdx]) + inner := strings.TrimSpace(spec[lpIdx+1 : rpIdx]) + parts := strings.SplitN(inner, ",", 2) + constraint := strings.TrimSpace(parts[0]) + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + if strings.HasPrefix(constraint, op) { + ps.Operator = op + ps.Version = strings.TrimSpace(constraint[len(op):]) + return ps + } + } return ps } } + // Find the first operator by position in the string + bestIdx := -1 + bestOp := "" + for _, op := range []string{"~=", "==", "!=", ">=", "<=", ">", "<"} { + idx := strings.Index(spec, op) + if idx != -1 && (bestIdx == -1 || idx < bestIdx) { + bestIdx = idx + bestOp = op + } + } + if bestIdx != -1 { + ps.Name = strings.TrimSpace(spec[:bestIdx]) + ps.Operator = bestOp + version := strings.TrimSpace(spec[bestIdx+len(bestOp):]) + if commaIdx := strings.Index(version, ","); commaIdx != -1 { + version = version[:commaIdx] + } + ps.Version = version + return ps + } + ps.Name = spec return ps } @@ -81,11 +124,253 @@ func normalizeName(name string) string { return strings.ToLower(regexp.MustCompile(`[-_.]+`).ReplaceAllString(name, "-")) } +// --- PyPI JSON API types --- + +// pypiPackageJSON is the response from https://pypi.org/pypi/{name}/{version}/json +type pypiPackageJSON struct { + Info pypiInfo `json:"info"` + URLs []pypiURL `json:"urls"` +} + +type pypiInfo struct { + Name string `json:"name"` + Version string `json:"version"` + RequiresDist []string `json:"requires_dist"` +} + +type pypiURL struct { + Filename string `json:"filename"` + URL string `json:"url"` + PackageType string `json:"packagetype"` + Digests pypiDigests `json:"digests"` +} + +type pypiDigests struct { + SHA256 string `json:"sha256"` +} + +// pypiVersionsJSON is a minimal parse of https://pypi.org/pypi/{name}/json +// to list available versions. +type pypiVersionsJSON struct { + Releases map[string][]pypiURL `json:"releases"` +} + +// --- Resolution --- + +// resolvePackages resolves package specs to specific wheel URLs, +// including transitive dependencies discovered via the PyPI JSON API. +func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { + log := clog.FromContext(ctx) + + if len(indexes) == 0 { + indexes = []string{defaultIndex} + } + + var resolved []ecosystem.ResolvedPackage + seen := map[string]bool{} + + // BFS queue + queue := make([]packageSpec, len(specs)) + copy(queue, specs) + + for len(queue) > 0 { + spec := queue[0] + queue = queue[1:] + + name := normalizeName(spec.Name) + if seen[name] { + continue + } + + pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch) + if err != nil { + return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) + } + seen[name] = true + resolved = append(resolved, pkg) + log.Debugf("resolved %s==%s from %s", pkg.Name, pkg.Version, pkg.URL) + + for _, dep := range deps { + if !seen[normalizeName(dep.Name)] { + log.Debugf("discovered transitive dependency: %s (from %s)", dep.Name, pkg.Name) + queue = append(queue, dep) + } + } + } + + return resolved, nil +} + +// resolveOneWithDeps resolves a package and returns both the resolved package +// and its transitive dependencies. It tries the PyPI JSON API first (which +// gives us clean metadata), falling back to the Simple API for non-PyPI indexes. +func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { + // Try PyPI JSON API first — it gives us metadata + wheel URLs in one call + if usesDefaultPyPI(indexes) { + pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch) + if err == nil { + return pkg, deps, nil + } + clog.FromContext(ctx).Debugf("JSON API failed for %s, falling back to Simple API: %v", spec.Name, err) + } + + // Fall back to Simple API + pkg, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + return pkg, nil, nil +} + +func usesDefaultPyPI(indexes []string) bool { + if pypiJSONBaseOverride != "" { + return true + } + for _, idx := range indexes { + if strings.Contains(idx, "pypi.org") { + return true + } + } + return false +} + +// resolveViaJSON resolves a package using the PyPI JSON API. +// Returns the resolved package and its parsed Requires-Dist as deps. +func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { + name := normalizeName(spec.Name) + + // If we have an exact version, fetch that directly + if spec.Operator == "==" { + return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch) + } + + // Otherwise, list all versions and pick the best + versionsURL := pypiJSONBase() + name + "/json" + data, err := httpGet(ctx, versionsURL) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + var versionsResp pypiVersionsJSON + if err := json.Unmarshal(data, &versionsResp); err != nil { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("parsing PyPI versions JSON: %w", err) + } + + // Find the best matching version + bestVersion := "" + for version := range versionsResp.Releases { + if !matchesVersionSpec(version, spec) { + continue + } + // Skip pre-releases unless explicitly requested + if isPreRelease(version) && spec.Operator != "==" { + continue + } + if bestVersion == "" || compareVersions(version, bestVersion) > 0 { + bestVersion = version + } + } + if bestVersion == "" { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("no matching version for %s%s%s", spec.Name, spec.Operator, spec.Version) + } + + return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch) +} + +// resolveJSONVersion fetches a specific version from the PyPI JSON API. +func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { + versionURL := pypiJSONBase() + normalizedName + "/" + version + "/json" + data, err := httpGet(ctx, versionURL) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + var pkgResp pypiPackageJSON + if err := json.Unmarshal(data, &pkgResp); err != nil { + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("parsing PyPI JSON: %w", err) + } + + // Find the best wheel from the URLs + wheelURL, checksum, err := selectBestWheelFromJSON(pkgResp.URLs, pythonVersion, arch) + if err != nil { + return ecosystem.ResolvedPackage{}, nil, err + } + + // Parse dependencies from requires_dist + var deps []packageSpec + for _, req := range pkgResp.Info.RequiresDist { + dep := parsePackageSpec(req) + if dep.Markers != "" && !evaluateMarkers(dep.Markers, nil) { + continue + } + deps = append(deps, dep) + } + + return ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: originalName, + Version: pkgResp.Info.Version, + URL: wheelURL, + Checksum: checksum, + }, deps, nil +} + +// selectBestWheelFromJSON picks the best compatible wheel from PyPI JSON API URLs. +func selectBestWheelFromJSON(urls []pypiURL, pythonVersion string, arch types.Architecture) (string, string, error) { + var bestURL *pypiURL + var bestParts wheelFileParts + bestScore := -1 + + for i, u := range urls { + if u.PackageType != "bdist_wheel" { + continue + } + parts, err := parseWheelFilename(u.Filename) + if err != nil { + continue + } + if !isCompatibleWheel(parts, pythonVersion, arch) { + continue + } + + score := wheelScore(parts, pythonVersion, arch) + if bestURL == nil || score > bestScore { + bestURL = &urls[i] + bestParts = parts + _ = bestParts // used for future scoring + bestScore = score + } + } + + if bestURL == nil { + return "", "", fmt.Errorf("no compatible wheel found") + } + + checksum := "" + if bestURL.Digests.SHA256 != "" { + checksum = "sha256:" + bestURL.Digests.SHA256 + } + return bestURL.URL, checksum, nil +} + +// isPreRelease returns true if a version string looks like a pre-release. +func isPreRelease(version string) bool { + v := strings.ToLower(version) + for _, tag := range []string{"a", "b", "rc", "alpha", "beta", "dev", "pre"} { + if strings.Contains(v, tag) { + return true + } + } + return false +} + +// --- Simple API fallback (for non-PyPI indexes) --- + // wheelLink represents a parsed link from a PEP 503 Simple API response. type wheelLink struct { - Filename string - URL string - Checksum string // "sha256:" + Filename string + URL string + Checksum string // "sha256:" RequiresPython string } @@ -93,7 +378,6 @@ type wheelLink struct { func parseSimpleIndex(body string, baseURL string) []wheelLink { var links []wheelLink - // Simple regex-based parsing of tags linkRe := regexp.MustCompile(`]*href="([^"]*)"[^>]*>([^<]*)`) requiresPythonRe := regexp.MustCompile(`data-requires-python="([^"]*)"`) @@ -111,14 +395,16 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { href = href[:hashIdx] } - // Resolve relative URLs - url := href + linkURL := href if !strings.HasPrefix(href, "http://") && !strings.HasPrefix(href, "https://") { - url = strings.TrimSuffix(baseURL, "/") + "/" + strings.TrimPrefix(href, "/") + if base, err := neturl.Parse(baseURL); err == nil { + if ref, err := neturl.Parse(href); err == nil { + linkURL = base.ResolveReference(ref).String() + } + } } var requiresPython string - // Check if there's a data-requires-python attribute in the full tag tagStart := strings.LastIndex(body[:strings.Index(body, match[0])+1], "= 0 { tagEnd := strings.Index(body[tagStart:], ">") + tagStart @@ -132,7 +418,7 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { links = append(links, wheelLink{ Filename: filename, - URL: url, + URL: linkURL, Checksum: checksum, RequiresPython: requiresPython, }) @@ -141,36 +427,9 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { return links } -// resolvePackages resolves package specs to specific wheel URLs using PEP 503. -func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { - log := clog.FromContext(ctx) - - if len(indexes) == 0 { - indexes = []string{defaultIndex} - } - - var resolved []ecosystem.ResolvedPackage - seen := map[string]bool{} - - for _, spec := range specs { - if seen[normalizeName(spec.Name)] { - continue - } - - pkg, err := resolveOne(ctx, spec, indexes, pythonVersion, arch) - if err != nil { - return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) - } - seen[normalizeName(spec.Name)] = true - resolved = append(resolved, pkg) - log.Debugf("resolved %s==%s from %s", pkg.Name, pkg.Version, pkg.URL) - } - - return resolved, nil -} - -// resolveOne resolves a single package spec to a wheel URL. -func resolveOne(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, error) { +// resolveViaSimple resolves a package using the PEP 503 Simple API. +// Does not return transitive deps (no metadata available without downloading). +func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, error) { name := normalizeName(spec.Name) for _, index := range indexes { @@ -210,7 +469,7 @@ type selectedWheel struct { checksum string } -// selectBestWheel selects the best compatible wheel from a list of links. +// selectBestWheel selects the best compatible wheel from Simple API links. func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, arch types.Architecture) (selectedWheel, error) { var bestLink *wheelLink var bestParts wheelFileParts @@ -221,11 +480,9 @@ func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, if err != nil { continue } - if !isCompatibleWheel(parts, pythonVersion, arch) { continue } - if !matchesVersionSpec(parts.Version, spec) { continue } @@ -249,7 +506,8 @@ func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, }, nil } -// matchesVersionSpec checks if a version matches the given spec. +// --- Version comparison --- + func matchesVersionSpec(version string, spec packageSpec) bool { if spec.Operator == "" { return true @@ -268,7 +526,6 @@ func matchesVersionSpec(version string, spec packageSpec) bool { case "<": return compareVersions(version, spec.Version) < 0 case "~=": - // Compatible release: ~=X.Y is equivalent to >=X.Y, ==X.* if compareVersions(version, spec.Version) < 0 { return false } @@ -277,7 +534,6 @@ func matchesVersionSpec(version string, spec packageSpec) bool { if len(specParts) < 2 || len(verParts) < 2 { return false } - // Major parts must match up to second-to-last for i := 0; i < len(specParts)-1 && i < len(verParts); i++ { if verParts[i] != specParts[i] { return false @@ -288,8 +544,6 @@ func matchesVersionSpec(version string, spec packageSpec) bool { return false } -// compareVersions performs a simple version comparison. -// Returns -1, 0, or 1. func compareVersions(a, b string) int { aParts := strings.Split(a, ".") bParts := strings.Split(b, ".") @@ -314,7 +568,6 @@ func compareVersions(a, b string) int { if aVal == bVal { continue } - // Try numeric comparison aNum := parseVersionPart(aVal) bNum := parseVersionPart(bVal) if aNum != bNum { @@ -323,7 +576,6 @@ func compareVersions(a, b string) int { } return 1 } - // Fall back to string comparison if aVal < bVal { return -1 } @@ -344,7 +596,8 @@ func parseVersionPart(s string) int { return n } -// fetchSimpleIndex fetches the PEP 503 Simple API page for a package. +// --- HTTP helpers --- + func fetchSimpleIndex(ctx context.Context, url string) (string, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { @@ -366,6 +619,24 @@ func fetchSimpleIndex(ctx context.Context, url string) (string, error) { if err != nil { return "", err } - return string(body), nil } + +func httpGet(ctx context.Context, url string) ([]byte, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { + return nil, err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("HTTP %d for %s", resp.StatusCode, url) + } + + return io.ReadAll(resp.Body) +} diff --git a/pkg/ecosystem/pip/resolve_test.go b/pkg/ecosystem/pip/resolve_test.go index ff227d45c..08ec3052b 100644 --- a/pkg/ecosystem/pip/resolve_test.go +++ b/pkg/ecosystem/pip/resolve_test.go @@ -16,6 +16,7 @@ package pip import ( "context" + "encoding/json" "net/http" "net/http/httptest" "testing" @@ -37,6 +38,9 @@ func TestParsePackageSpec(t *testing.T) { {"foo~=1.4.2", "foo", "~=", "1.4.2", ""}, {"bar!=2.0", "bar", "!=", "2.0", ""}, {`baz>=1.0; python_version>="3.8"`, "baz", ">=", "1.0", `python_version>="3.8"`}, + {"typing-extensions (>=4.10.0)", "typing-extensions", ">=", "4.10.0", ""}, + {"packaging (>=22.0,<25.0)", "packaging", ">=", "22.0", ""}, + {"mpmath<1.4,>=1.1.0", "mpmath", "<", "1.4", ""}, } for _, tt := range tests { @@ -148,19 +152,87 @@ func TestMatchesVersionSpec(t *testing.T) { } } -func TestResolveWithMockServer(t *testing.T) { +func TestIsPreRelease(t *testing.T) { + tests := []struct { + version string + want bool + }{ + {"3.0.0", false}, + {"3.0.0rc1", true}, + {"3.0.0a1", true}, + {"3.0.0b2", true}, + {"3.0.0.dev1", true}, + {"1.14.0rc2", true}, + } + for _, tt := range tests { + t.Run(tt.version, func(t *testing.T) { + got := isPreRelease(tt.version) + if got != tt.want { + t.Errorf("isPreRelease(%q) = %v, want %v", tt.version, got, tt.want) + } + }) + } +} + +// servePyPIJSON creates a mock server that serves PyPI JSON API responses. +func servePyPIJSON(t *testing.T, packages map[string]pypiPackageJSON) *httptest.Server { + t.Helper() mux := http.NewServeMux() - mux.HandleFunc("/simple/flask/", func(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "text/html") - w.Write([]byte(` -Flask-3.0.0-py3-none-any.whl -Flask-2.3.0-py3-none-any.whl -`)) - }) + for name, pkg := range packages { + name := normalizeName(name) + pkg := pkg - server := httptest.NewServer(mux) + // Serve /pypi/{name}/{version}/json + mux.HandleFunc("/pypi/"+name+"/"+pkg.Info.Version+"/json", func(w http.ResponseWriter, r *http.Request) { + json.NewEncoder(w).Encode(pkg) + }) + + // Serve /pypi/{name}/json (versions listing) + mux.HandleFunc("/pypi/"+name+"/json", func(w http.ResponseWriter, r *http.Request) { + resp := pypiVersionsJSON{ + Releases: map[string][]pypiURL{ + pkg.Info.Version: pkg.URLs, + }, + } + json.NewEncoder(w).Encode(resp) + }) + + // Serve Simple API as fallback + mux.HandleFunc("/simple/"+name+"/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + html := "\n" + for _, u := range pkg.URLs { + html += `` + u.Filename + "\n" + } + html += "" + w.Write([]byte(html)) + }) + } + return httptest.NewServer(mux) +} + +func TestResolveWithMockJSON(t *testing.T) { + server := servePyPIJSON(t, map[string]pypiPackageJSON{ + "flask": { + Info: pypiInfo{ + Name: "Flask", + Version: "3.0.0", + }, + URLs: []pypiURL{{ + Filename: "Flask-3.0.0-py3-none-any.whl", + URL: "https://files.example.com/Flask-3.0.0-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "abc123"}, + }}, + }, + }) defer server.Close() + // Override the JSON API base for the test + origBase := pypiJSONBase + defer func() { pypiJSONBaseOverride = ""; _ = origBase }() + pypiJSONBaseOverride = server.URL + "/pypi/" + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) if err != nil { @@ -170,7 +242,6 @@ func TestResolveWithMockServer(t *testing.T) { if len(resolved) != 1 { t.Fatalf("expected 1 resolved package, got %d", len(resolved)) } - if resolved[0].Name != "flask" { t.Errorf("Name = %q, want %q", resolved[0].Name, "flask") } @@ -181,3 +252,118 @@ func TestResolveWithMockServer(t *testing.T) { t.Errorf("Checksum = %q, want %q", resolved[0].Checksum, "sha256:abc123") } } + +func TestResolveTransitiveDeps(t *testing.T) { + server := servePyPIJSON(t, map[string]pypiPackageJSON{ + "flask": { + Info: pypiInfo{ + Name: "Flask", + Version: "3.0.0", + RequiresDist: []string{ + "Werkzeug>=3.0.0", + "click>=8.0", + "devtools; extra == \"dev\"", + }, + }, + URLs: []pypiURL{{ + Filename: "Flask-3.0.0-py3-none-any.whl", + URL: "https://files.example.com/Flask-3.0.0-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "aaa"}, + }}, + }, + "werkzeug": { + Info: pypiInfo{ + Name: "Werkzeug", + Version: "3.0.1", + RequiresDist: []string{ + "MarkupSafe>=2.1.1", + }, + }, + URLs: []pypiURL{{ + Filename: "Werkzeug-3.0.1-py3-none-any.whl", + URL: "https://files.example.com/Werkzeug-3.0.1-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "bbb"}, + }}, + }, + "click": { + Info: pypiInfo{ + Name: "click", + Version: "8.1.7", + }, + URLs: []pypiURL{{ + Filename: "click-8.1.7-py3-none-any.whl", + URL: "https://files.example.com/click-8.1.7-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "ccc"}, + }}, + }, + "markupsafe": { + Info: pypiInfo{ + Name: "MarkupSafe", + Version: "2.1.5", + }, + URLs: []pypiURL{{ + Filename: "MarkupSafe-2.1.5-py3-none-any.whl", + URL: "https://files.example.com/MarkupSafe-2.1.5-py3-none-any.whl", + PackageType: "bdist_wheel", + Digests: pypiDigests{SHA256: "ddd"}, + }}, + }, + }) + defer server.Close() + + pypiJSONBaseOverride = server.URL + "/pypi/" + defer func() { pypiJSONBaseOverride = "" }() + + specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + names := map[string]bool{} + for _, pkg := range resolved { + names[normalizeName(pkg.Name)] = true + } + + for _, want := range []string{"flask", "werkzeug", "click", "markupsafe"} { + if !names[want] { + t.Errorf("missing transitive dependency: %s (resolved: %v)", want, names) + } + } + if names["devtools"] { + t.Error("should NOT include devtools (gated on extra)") + } + if len(resolved) != 4 { + t.Errorf("expected 4 resolved packages, got %d: %v", len(resolved), names) + } +} + +func TestResolveSimpleApiFallback(t *testing.T) { + // Test that non-PyPI indexes use the Simple API + mux := http.NewServeMux() + mux.HandleFunc("/simple/mypackage/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.Write([]byte(` +mypackage-1.0.0-py3-none-any.whl +`)) + }) + server := httptest.NewServer(mux) + defer server.Close() + + specs := []packageSpec{{Name: "mypackage", Operator: "==", Version: "1.0.0"}} + // Use a non-pypi index so it doesn't try the JSON API + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + if err != nil { + t.Fatalf("resolvePackages() error: %v", err) + } + + if len(resolved) != 1 { + t.Fatalf("expected 1 resolved package, got %d", len(resolved)) + } + if resolved[0].Version != "1.0.0" { + t.Errorf("Version = %q, want %q", resolved[0].Version, "1.0.0") + } +} diff --git a/pkg/ecosystem/pip/wheel.go b/pkg/ecosystem/pip/wheel.go index 32d396da8..51f3f7df7 100644 --- a/pkg/ecosystem/pip/wheel.go +++ b/pkg/ecosystem/pip/wheel.go @@ -89,6 +89,244 @@ func writeInstallerFile(fsys apkfs.FullFS, sitePackagesPath string, wheelData [] return nil } +// readMetadata reads the METADATA file from a wheel and returns its contents. +func readMetadata(wheelData []byte) (string, error) { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return "", err + } + + for _, f := range reader.File { + if strings.HasSuffix(f.Name, ".dist-info/METADATA") { + rc, err := f.Open() + if err != nil { + return "", err + } + data, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return "", err + } + return string(data), nil + } + } + + return "", fmt.Errorf("METADATA not found in wheel") +} + +// parseRequiresDist extracts Requires-Dist entries from wheel METADATA content. +// Returns parsed package specs, filtering out entries with unsatisfiable markers. +func parseRequiresDist(metadata string, extras []string) []packageSpec { + var deps []packageSpec + for _, line := range strings.Split(metadata, "\n") { + line = strings.TrimSpace(line) + if !strings.HasPrefix(line, "Requires-Dist:") { + continue + } + req := strings.TrimSpace(strings.TrimPrefix(line, "Requires-Dist:")) + spec := parsePackageSpec(req) + + // Skip deps gated on extras we didn't request + if spec.Markers != "" && !evaluateMarkers(spec.Markers, extras) { + continue + } + + deps = append(deps, spec) + } + return deps +} + +// evaluateMarkers performs a simplified evaluation of PEP 508 environment markers. +// It handles the most common cases: +// - extra == "..." — only satisfied if the extra was requested +// - os_name, sys_platform, platform_system — always Linux +// - python_version — assumed satisfied (we already filtered wheels) +// - implementation_name — "cpython" +// +// For compound markers (and/or), we do best-effort evaluation. +func evaluateMarkers(markers string, requestedExtras []string) bool { + markers = strings.TrimSpace(markers) + + // Handle "or" — if any branch is true, the whole thing is true + if orParts := splitMarkerOr(markers); len(orParts) > 1 { + for _, part := range orParts { + if evaluateMarkers(part, requestedExtras) { + return true + } + } + return false + } + + // Handle "and" — all branches must be true + if andParts := splitMarkerAnd(markers); len(andParts) > 1 { + for _, part := range andParts { + if !evaluateMarkers(part, requestedExtras) { + return false + } + } + return true + } + + // Strip outer parens + markers = strings.TrimSpace(markers) + for strings.HasPrefix(markers, "(") && strings.HasSuffix(markers, ")") { + markers = strings.TrimSpace(markers[1 : len(markers)-1]) + } + + // Parse single comparison: key op value + key, op, value := parseMarkerExpr(markers) + if key == "" { + // Can't parse — be permissive, include the dep + return true + } + + switch key { + case "extra": + // Only include if the extra was explicitly requested + for _, e := range requestedExtras { + if matchMarkerOp(e, op, value) { + return true + } + } + return false + case "os_name": + return matchMarkerOp("posix", op, value) + case "sys_platform": + return matchMarkerOp("linux", op, value) + case "platform_system": + return matchMarkerOp("Linux", op, value) + case "implementation_name": + return matchMarkerOp("cpython", op, value) + case "python_version", "python_full_version", "platform_machine", + "platform_release", "platform_version", "implementation_version": + // Be permissive for version-related markers — we've already + // filtered wheels by Python version compatibility. + return true + default: + // Unknown marker — be permissive + return true + } +} + +// splitMarkerOr splits on " or " at the top level (not inside parens). +func splitMarkerOr(s string) []string { + return splitMarkerBool(s, " or ") +} + +// splitMarkerAnd splits on " and " at the top level (not inside parens). +func splitMarkerAnd(s string) []string { + return splitMarkerBool(s, " and ") +} + +func splitMarkerBool(s, sep string) []string { + var parts []string + depth := 0 + start := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '(': + depth++ + case ')': + depth-- + default: + if depth == 0 && i+len(sep) <= len(s) && s[i:i+len(sep)] == sep { + parts = append(parts, strings.TrimSpace(s[start:i])) + start = i + len(sep) + i += len(sep) - 1 + } + } + } + parts = append(parts, strings.TrimSpace(s[start:])) + if len(parts) == 1 && parts[0] == s { + return parts + } + return parts +} + +// parseMarkerExpr parses "key op 'value'" or "'value' op key". +func parseMarkerExpr(expr string) (key, op, value string) { + expr = strings.TrimSpace(expr) + + // Try patterns like: extra == "dev" or "linux" == sys_platform + for _, operator := range []string{"===", "~=", "==", "!=", ">=", "<=", ">", "<", " in ", " not in "} { + idx := strings.Index(expr, operator) + if idx < 0 { + continue + } + lhs := strings.TrimSpace(expr[:idx]) + rhs := strings.TrimSpace(expr[idx+len(operator):]) + + lhs = stripQuotes(lhs) + rhs = stripQuotes(rhs) + + // Figure out which side is the key vs the value + if isMarkerVar(lhs) { + return lhs, strings.TrimSpace(operator), rhs + } + if isMarkerVar(rhs) { + return rhs, flipOp(strings.TrimSpace(operator)), lhs + } + // Both look like values — treat lhs as key + return lhs, strings.TrimSpace(operator), rhs + } + return "", "", "" +} + +func stripQuotes(s string) string { + if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) { + return s[1 : len(s)-1] + } + return s +} + +func isMarkerVar(s string) bool { + switch s { + case "os_name", "sys_platform", "platform_machine", "platform_python_implementation", + "platform_release", "platform_system", "platform_version", + "python_version", "python_full_version", "implementation_name", + "implementation_version", "extra": + return true + } + return false +} + +func flipOp(op string) string { + switch op { + case ">": + return "<" + case "<": + return ">" + case ">=": + return "<=" + case "<=": + return ">=" + } + return op +} + +func matchMarkerOp(actual, op, expected string) bool { + switch op { + case "==", "===": + return actual == expected + case "!=": + return actual != expected + case "in": + return strings.Contains(expected, actual) + case "not in": + return !strings.Contains(expected, actual) + case ">=": + return actual >= expected + case "<=": + return actual <= expected + case ">": + return actual > expected + case "<": + return actual < expected + default: + return true + } +} + // verifyChecksum verifies the SHA256 checksum of data against the expected value. func verifyChecksum(data []byte, expected string) error { if expected == "" { diff --git a/pkg/ecosystem/pip/wheel_test.go b/pkg/ecosystem/pip/wheel_test.go index f16fac12f..5518c3723 100644 --- a/pkg/ecosystem/pip/wheel_test.go +++ b/pkg/ecosystem/pip/wheel_test.go @@ -19,6 +19,7 @@ import ( "bytes" "crypto/sha256" "encoding/hex" + "strings" "testing" apkfs "chainguard.dev/apko/pkg/apk/fs" @@ -104,6 +105,132 @@ func TestWriteInstallerFile(t *testing.T) { } } +func TestReadMetadata(t *testing.T) { + wheelData := createTestWheel(t, map[string]string{ + "flask-3.0.0.dist-info/METADATA": `Metadata-Version: 2.1 +Name: Flask +Version: 3.0.0 +Requires-Dist: Werkzeug>=3.0.0 +Requires-Dist: Jinja2>=3.1.2 +Requires-Dist: itsdangerous>=2.1.2 +Requires-Dist: click>=8.1.3 +Requires-Dist: blinker>=1.6.2 +Requires-Dist: importlib-metadata>=3.6.0; python_version < "3.10" +Requires-Dist: async-timeout>=4.0.3; extra == "async" +`, + }) + + metadata, err := readMetadata(wheelData) + if err != nil { + t.Fatalf("readMetadata() error: %v", err) + } + if !strings.Contains(metadata, "Requires-Dist: Werkzeug>=3.0.0") { + t.Error("metadata should contain Werkzeug requirement") + } +} + +func TestParseRequiresDist(t *testing.T) { + metadata := `Metadata-Version: 2.1 +Name: Flask +Version: 3.0.0 +Requires-Dist: Werkzeug>=3.0.0 +Requires-Dist: Jinja2>=3.1.2 +Requires-Dist: click>=8.1.3 +Requires-Dist: importlib-metadata>=3.6.0; python_version < "3.10" +Requires-Dist: async-timeout>=4.0.3; extra == "async" +Requires-Dist: pytest; extra == "test" +` + // Without extras — should get runtime deps only, not extra-gated ones + deps := parseRequiresDist(metadata, nil) + + names := map[string]bool{} + for _, d := range deps { + names[d.Name] = true + } + + if !names["Werkzeug"] { + t.Error("should include Werkzeug") + } + if !names["Jinja2"] { + t.Error("should include Jinja2") + } + if !names["click"] { + t.Error("should include click") + } + // python_version markers are permissively included + if !names["importlib-metadata"] { + t.Error("should include importlib-metadata (python_version marker is permissive)") + } + // extra-gated deps should be excluded + if names["async-timeout"] { + t.Error("should NOT include async-timeout (gated on extra)") + } + if names["pytest"] { + t.Error("should NOT include pytest (gated on extra)") + } +} + +func TestParseRequiresDistWithExtras(t *testing.T) { + metadata := `Metadata-Version: 2.1 +Name: Flask +Version: 3.0.0 +Requires-Dist: Werkzeug>=3.0.0 +Requires-Dist: async-timeout>=4.0.3; extra == "async" +Requires-Dist: pytest; extra == "test" +` + deps := parseRequiresDist(metadata, []string{"async"}) + + names := map[string]bool{} + for _, d := range deps { + names[d.Name] = true + } + + if !names["Werkzeug"] { + t.Error("should include Werkzeug") + } + if !names["async-timeout"] { + t.Error("should include async-timeout (async extra requested)") + } + if names["pytest"] { + t.Error("should NOT include pytest (test extra not requested)") + } +} + +func TestEvaluateMarkers(t *testing.T) { + tests := []struct { + name string + markers string + extras []string + want bool + }{ + {"no markers", "", nil, true}, + {"extra not requested", `extra == "dev"`, nil, false}, + {"extra requested", `extra == "dev"`, []string{"dev"}, true}, + {"wrong extra", `extra == "dev"`, []string{"test"}, false}, + {"os_name posix", `os_name == "posix"`, nil, true}, + {"os_name nt", `os_name == "nt"`, nil, false}, + {"sys_platform linux", `sys_platform == "linux"`, nil, true}, + {"sys_platform win32", `sys_platform == "win32"`, nil, false}, + {"platform_system Linux", `platform_system == "Linux"`, nil, true}, + {"python_version", `python_version >= "3.8"`, nil, true}, + {"compound and true", `python_version >= "3.8" and os_name == "posix"`, nil, true}, + {"compound and false", `os_name == "nt" and python_version >= "3.8"`, nil, false}, + {"compound or true", `os_name == "nt" or os_name == "posix"`, nil, true}, + {"compound or false", `os_name == "nt" or sys_platform == "win32"`, nil, false}, + {"extra and platform", `extra == "dev" and os_name == "posix"`, []string{"dev"}, true}, + {"extra and wrong platform", `extra == "dev" and os_name == "nt"`, []string{"dev"}, false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := evaluateMarkers(tt.markers, tt.extras) + if got != tt.want { + t.Errorf("evaluateMarkers(%q, %v) = %v, want %v", tt.markers, tt.extras, got, tt.want) + } + }) + } +} + func TestVerifyChecksum(t *testing.T) { data := []byte("hello world") h := sha256.Sum256(data) From b7fe42f85a622fa6c9d421a0f02a08f167075c3b Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Fri, 3 Apr 2026 14:08:12 -0600 Subject: [PATCH 03/12] refactor: rename ecosystem/pip to ecosystem/python Rename the package directory and Go package from "pip" to "python" to match the ecosystem name used in YAML config. Update all import paths and log messages accordingly. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/cli/lock.go | 2 +- pkg/build/build_implementation.go | 2 +- pkg/ecosystem/ecosystem.go | 2 +- pkg/ecosystem/{pip => python}/platform.go | 2 +- pkg/ecosystem/{pip => python}/platform_test.go | 2 +- pkg/ecosystem/{pip/pip.go => python/python.go} | 6 +++--- pkg/ecosystem/{pip/pip_test.go => python/python_test.go} | 2 +- pkg/ecosystem/{pip => python}/resolve.go | 2 +- pkg/ecosystem/{pip => python}/resolve_test.go | 2 +- pkg/ecosystem/{pip => python}/wheel.go | 2 +- pkg/ecosystem/{pip => python}/wheel_test.go | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) rename pkg/ecosystem/{pip => python}/platform.go (99%) rename pkg/ecosystem/{pip => python}/platform_test.go (99%) rename pkg/ecosystem/{pip/pip.go => python/python.go} (95%) rename pkg/ecosystem/{pip/pip_test.go => python/python_test.go} (99%) rename pkg/ecosystem/{pip => python}/resolve.go (99%) rename pkg/ecosystem/{pip => python}/resolve_test.go (99%) rename pkg/ecosystem/{pip => python}/wheel.go (99%) rename pkg/ecosystem/{pip => python}/wheel_test.go (99%) diff --git a/internal/cli/lock.go b/internal/cli/lock.go index c05348843..22d46382f 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -36,7 +36,7 @@ import ( "chainguard.dev/apko/pkg/build" "chainguard.dev/apko/pkg/build/types" "chainguard.dev/apko/pkg/ecosystem" - _ "chainguard.dev/apko/pkg/ecosystem/pip" + _ "chainguard.dev/apko/pkg/ecosystem/python" pkglock "chainguard.dev/apko/pkg/lock" ) diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index 63f702835..6e086b8e5 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -37,7 +37,7 @@ import ( "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/ecosystem" - _ "chainguard.dev/apko/pkg/ecosystem/pip" // Register pip ecosystem installer. + _ "chainguard.dev/apko/pkg/ecosystem/python" // Register python ecosystem installer. "chainguard.dev/apko/pkg/lock" "chainguard.dev/apko/pkg/options" ) diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go index ce12200a2..716411ca5 100644 --- a/pkg/ecosystem/ecosystem.go +++ b/pkg/ecosystem/ecosystem.go @@ -35,7 +35,7 @@ type ResolvedPackage struct { // Installer is the interface that ecosystem package installers must implement. type Installer interface { - // Name returns the ecosystem name (e.g., "pip"). + // Name returns the ecosystem name (e.g., "python"). Name() string // Resolve resolves the requested packages to specific versions and URLs. Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ResolvedPackage, error) diff --git a/pkg/ecosystem/pip/platform.go b/pkg/ecosystem/python/platform.go similarity index 99% rename from pkg/ecosystem/pip/platform.go rename to pkg/ecosystem/python/platform.go index 54d1c7c14..c1e9dd06b 100644 --- a/pkg/ecosystem/pip/platform.go +++ b/pkg/ecosystem/python/platform.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "fmt" diff --git a/pkg/ecosystem/pip/platform_test.go b/pkg/ecosystem/python/platform_test.go similarity index 99% rename from pkg/ecosystem/pip/platform_test.go rename to pkg/ecosystem/python/platform_test.go index e96edd4d7..7edd62ed9 100644 --- a/pkg/ecosystem/pip/platform_test.go +++ b/pkg/ecosystem/python/platform_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "testing" diff --git a/pkg/ecosystem/pip/pip.go b/pkg/ecosystem/python/python.go similarity index 95% rename from pkg/ecosystem/pip/pip.go rename to pkg/ecosystem/python/python.go index c5cd4487a..47cbfa6ed 100644 --- a/pkg/ecosystem/pip/pip.go +++ b/pkg/ecosystem/python/python.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "context" @@ -70,7 +70,7 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e if pythonVersion == "" { return fmt.Errorf("no Python installation found in filesystem; install python3 via APK first") } - log.Infof("detected Python %s for pip ecosystem install", pythonVersion) + log.Infof("detected Python %s for python ecosystem install", pythonVersion) sitePackagesPath := fmt.Sprintf("usr/lib/python%s/site-packages", pythonVersion) if err := fsys.MkdirAll(sitePackagesPath, 0755); err != nil { @@ -78,7 +78,7 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e } for _, pkg := range packages { - log.Infof("installing pip package %s==%s", pkg.Name, pkg.Version) + log.Infof("installing python package %s==%s", pkg.Name, pkg.Version) data, err := downloadWheel(ctx, pkg.URL) if err != nil { diff --git a/pkg/ecosystem/pip/pip_test.go b/pkg/ecosystem/python/python_test.go similarity index 99% rename from pkg/ecosystem/pip/pip_test.go rename to pkg/ecosystem/python/python_test.go index 514c00c2c..af966bfa1 100644 --- a/pkg/ecosystem/pip/pip_test.go +++ b/pkg/ecosystem/python/python_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "testing" diff --git a/pkg/ecosystem/pip/resolve.go b/pkg/ecosystem/python/resolve.go similarity index 99% rename from pkg/ecosystem/pip/resolve.go rename to pkg/ecosystem/python/resolve.go index af88a79ad..f6205193b 100644 --- a/pkg/ecosystem/pip/resolve.go +++ b/pkg/ecosystem/python/resolve.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "context" diff --git a/pkg/ecosystem/pip/resolve_test.go b/pkg/ecosystem/python/resolve_test.go similarity index 99% rename from pkg/ecosystem/pip/resolve_test.go rename to pkg/ecosystem/python/resolve_test.go index 08ec3052b..8f24b113b 100644 --- a/pkg/ecosystem/pip/resolve_test.go +++ b/pkg/ecosystem/python/resolve_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "context" diff --git a/pkg/ecosystem/pip/wheel.go b/pkg/ecosystem/python/wheel.go similarity index 99% rename from pkg/ecosystem/pip/wheel.go rename to pkg/ecosystem/python/wheel.go index 51f3f7df7..4ce9be3b1 100644 --- a/pkg/ecosystem/pip/wheel.go +++ b/pkg/ecosystem/python/wheel.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "archive/zip" diff --git a/pkg/ecosystem/pip/wheel_test.go b/pkg/ecosystem/python/wheel_test.go similarity index 99% rename from pkg/ecosystem/pip/wheel_test.go rename to pkg/ecosystem/python/wheel_test.go index 5518c3723..b5837a482 100644 --- a/pkg/ecosystem/pip/wheel_test.go +++ b/pkg/ecosystem/python/wheel_test.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package pip +package python import ( "archive/zip" From 99ca4cd8d190ec7836d604b64989732ae5cb0cfd Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Fri, 3 Apr 2026 14:10:33 -0600 Subject: [PATCH 04/12] cleanup: remove dead wheel METADATA parsing code Remove readMetadata and parseRequiresDist, which are no longer used after switching to the PyPI JSON API for dependency discovery. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/ecosystem/python/wheel.go | 47 --------------- pkg/ecosystem/python/wheel_test.go | 92 ------------------------------ 2 files changed, 139 deletions(-) diff --git a/pkg/ecosystem/python/wheel.go b/pkg/ecosystem/python/wheel.go index 4ce9be3b1..6ade0eeb5 100644 --- a/pkg/ecosystem/python/wheel.go +++ b/pkg/ecosystem/python/wheel.go @@ -89,53 +89,6 @@ func writeInstallerFile(fsys apkfs.FullFS, sitePackagesPath string, wheelData [] return nil } -// readMetadata reads the METADATA file from a wheel and returns its contents. -func readMetadata(wheelData []byte) (string, error) { - reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) - if err != nil { - return "", err - } - - for _, f := range reader.File { - if strings.HasSuffix(f.Name, ".dist-info/METADATA") { - rc, err := f.Open() - if err != nil { - return "", err - } - data, err := io.ReadAll(rc) - rc.Close() - if err != nil { - return "", err - } - return string(data), nil - } - } - - return "", fmt.Errorf("METADATA not found in wheel") -} - -// parseRequiresDist extracts Requires-Dist entries from wheel METADATA content. -// Returns parsed package specs, filtering out entries with unsatisfiable markers. -func parseRequiresDist(metadata string, extras []string) []packageSpec { - var deps []packageSpec - for _, line := range strings.Split(metadata, "\n") { - line = strings.TrimSpace(line) - if !strings.HasPrefix(line, "Requires-Dist:") { - continue - } - req := strings.TrimSpace(strings.TrimPrefix(line, "Requires-Dist:")) - spec := parsePackageSpec(req) - - // Skip deps gated on extras we didn't request - if spec.Markers != "" && !evaluateMarkers(spec.Markers, extras) { - continue - } - - deps = append(deps, spec) - } - return deps -} - // evaluateMarkers performs a simplified evaluation of PEP 508 environment markers. // It handles the most common cases: // - extra == "..." — only satisfied if the extra was requested diff --git a/pkg/ecosystem/python/wheel_test.go b/pkg/ecosystem/python/wheel_test.go index b5837a482..4d87c8a4a 100644 --- a/pkg/ecosystem/python/wheel_test.go +++ b/pkg/ecosystem/python/wheel_test.go @@ -19,7 +19,6 @@ import ( "bytes" "crypto/sha256" "encoding/hex" - "strings" "testing" apkfs "chainguard.dev/apko/pkg/apk/fs" @@ -105,97 +104,6 @@ func TestWriteInstallerFile(t *testing.T) { } } -func TestReadMetadata(t *testing.T) { - wheelData := createTestWheel(t, map[string]string{ - "flask-3.0.0.dist-info/METADATA": `Metadata-Version: 2.1 -Name: Flask -Version: 3.0.0 -Requires-Dist: Werkzeug>=3.0.0 -Requires-Dist: Jinja2>=3.1.2 -Requires-Dist: itsdangerous>=2.1.2 -Requires-Dist: click>=8.1.3 -Requires-Dist: blinker>=1.6.2 -Requires-Dist: importlib-metadata>=3.6.0; python_version < "3.10" -Requires-Dist: async-timeout>=4.0.3; extra == "async" -`, - }) - - metadata, err := readMetadata(wheelData) - if err != nil { - t.Fatalf("readMetadata() error: %v", err) - } - if !strings.Contains(metadata, "Requires-Dist: Werkzeug>=3.0.0") { - t.Error("metadata should contain Werkzeug requirement") - } -} - -func TestParseRequiresDist(t *testing.T) { - metadata := `Metadata-Version: 2.1 -Name: Flask -Version: 3.0.0 -Requires-Dist: Werkzeug>=3.0.0 -Requires-Dist: Jinja2>=3.1.2 -Requires-Dist: click>=8.1.3 -Requires-Dist: importlib-metadata>=3.6.0; python_version < "3.10" -Requires-Dist: async-timeout>=4.0.3; extra == "async" -Requires-Dist: pytest; extra == "test" -` - // Without extras — should get runtime deps only, not extra-gated ones - deps := parseRequiresDist(metadata, nil) - - names := map[string]bool{} - for _, d := range deps { - names[d.Name] = true - } - - if !names["Werkzeug"] { - t.Error("should include Werkzeug") - } - if !names["Jinja2"] { - t.Error("should include Jinja2") - } - if !names["click"] { - t.Error("should include click") - } - // python_version markers are permissively included - if !names["importlib-metadata"] { - t.Error("should include importlib-metadata (python_version marker is permissive)") - } - // extra-gated deps should be excluded - if names["async-timeout"] { - t.Error("should NOT include async-timeout (gated on extra)") - } - if names["pytest"] { - t.Error("should NOT include pytest (gated on extra)") - } -} - -func TestParseRequiresDistWithExtras(t *testing.T) { - metadata := `Metadata-Version: 2.1 -Name: Flask -Version: 3.0.0 -Requires-Dist: Werkzeug>=3.0.0 -Requires-Dist: async-timeout>=4.0.3; extra == "async" -Requires-Dist: pytest; extra == "test" -` - deps := parseRequiresDist(metadata, []string{"async"}) - - names := map[string]bool{} - for _, d := range deps { - names[d.Name] = true - } - - if !names["Werkzeug"] { - t.Error("should include Werkzeug") - } - if !names["async-timeout"] { - t.Error("should include async-timeout (async extra requested)") - } - if names["pytest"] { - t.Error("should NOT include pytest (test extra not requested)") - } -} - func TestEvaluateMarkers(t *testing.T) { tests := []struct { name string From 69079491fa9ce154eb85f693c10ed3d96cc6ee4a Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Fri, 3 Apr 2026 14:43:56 -0600 Subject: [PATCH 05/12] feat: add virtual environment support with automatic PATH/VIRTUAL_ENV When `venv` is set in the python ecosystem config, packages are installed into a virtual environment with proper pyvenv.cfg and bin/python symlinks. The image environment is automatically configured with VIRTUAL_ENV and PATH prepended with the venv bin directory. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/build/build_implementation.go | 13 ++++- pkg/build/types/image_configuration.go | 3 + pkg/build/types/types.go | 6 +- pkg/ecosystem/ecosystem.go | 21 ++++--- pkg/ecosystem/python/python.go | 79 +++++++++++++++++++++++--- pkg/ecosystem/python/python_test.go | 49 ++++++++++++++++ 6 files changed, 152 insertions(+), 19 deletions(-) diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index 6e086b8e5..68794e454 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -179,12 +179,21 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) } } - // Install ecosystem packages (pip, etc.) after APK packages so that + // Install ecosystem packages (python, etc.) after APK packages so that // the language runtime is available for version detection. if len(bc.ic.Contents.Ecosystems) > 0 { - if err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch); err != nil { + env, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch) + if err != nil { return nil, fmt.Errorf("installing ecosystem packages: %w", err) } + if len(env) > 0 { + if bc.ic.Environment == nil { + bc.ic.Environment = make(map[string]string) + } + for k, v := range env { + bc.ic.Environment[k] = v + } + } } // For now adding additional accounts is banned when using base image. On the other hand, we don't want to diff --git a/pkg/build/types/image_configuration.go b/pkg/build/types/image_configuration.go index cb744668b..04b9e6a8e 100644 --- a/pkg/build/types/image_configuration.go +++ b/pkg/build/types/image_configuration.go @@ -193,6 +193,9 @@ func (i *ImageContents) MergeInto(target *ImageContents) error { if existing.PythonVersion == "" { existing.PythonVersion = eco.PythonVersion } + if existing.Venv == "" { + existing.Venv = eco.Venv + } target.Ecosystems[name] = existing } else { target.Ecosystems[name] = eco diff --git a/pkg/build/types/types.go b/pkg/build/types/types.go index b569041f6..357512c7a 100644 --- a/pkg/build/types/types.go +++ b/pkg/build/types/types.go @@ -104,7 +104,7 @@ type BaseImageDescriptor struct { APKIndex string `json:"apkindex,omitempty" yaml:"apkindex,omitempty"` } -// EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., pip). +// EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., python). type EcosystemConfig struct { // Indexes is a list of package index URLs (e.g., PyPI simple API URLs). Indexes []string `json:"indexes,omitempty" yaml:"indexes,omitempty"` @@ -112,6 +112,10 @@ type EcosystemConfig struct { Packages []string `json:"packages,omitempty" yaml:"packages,omitempty"` // PythonVersion overrides auto-detection of the Python version (e.g., "3.12"). PythonVersion string `json:"python_version,omitempty" yaml:"python_version,omitempty"` + // Venv is an optional path for a virtual environment (e.g., "/app/venv"). + // When set, packages are installed into the venv instead of the system site-packages, + // and VIRTUAL_ENV / PATH are set automatically. + Venv string `json:"venv,omitempty" yaml:"venv,omitempty"` } type ImageContents struct { diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go index 716411ca5..cc475e07b 100644 --- a/pkg/ecosystem/ecosystem.go +++ b/pkg/ecosystem/ecosystem.go @@ -40,7 +40,8 @@ type Installer interface { // Resolve resolves the requested packages to specific versions and URLs. Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ResolvedPackage, error) // Install extracts resolved packages into the filesystem. - Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage) error + // Returns environment variables that should be set in the image configuration. + Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage, config types.EcosystemConfig) (map[string]string, error) } var ( @@ -67,22 +68,28 @@ func Get(name string) (Installer, bool) { } // InstallAll installs packages for all configured ecosystems. -func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture) error { +// Returns environment variables that should be set in the image configuration. +func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture) (map[string]string, error) { + env := map[string]string{} for name, config := range ecosystems { installer, ok := Get(name) if !ok { - return fmt.Errorf("unknown ecosystem: %s", name) + return nil, fmt.Errorf("unknown ecosystem: %s", name) } resolved, err := installer.Resolve(ctx, config, arch) if err != nil { - return fmt.Errorf("resolving %s packages: %w", name, err) + return nil, fmt.Errorf("resolving %s packages: %w", name, err) } if len(resolved) == 0 { continue } - if err := installer.Install(ctx, fs, resolved); err != nil { - return fmt.Errorf("installing %s packages: %w", name, err) + vars, err := installer.Install(ctx, fs, resolved, config) + if err != nil { + return nil, fmt.Errorf("installing %s packages: %w", name, err) + } + for k, v := range vars { + env[k] = v } } - return nil + return env, nil } diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go index 47cbfa6ed..d4d5320c9 100644 --- a/pkg/ecosystem/python/python.go +++ b/pkg/ecosystem/python/python.go @@ -19,6 +19,7 @@ import ( "fmt" "io" "net/http" + "path/filepath" "strings" "github.com/chainguard-dev/clog" @@ -53,8 +54,6 @@ func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, a indexes = []string{defaultIndex} } - // We need a Python version to filter wheels. We'll use a default that - // callers can override via the config, or detect later during install. pythonVersion := config.PythonVersion if pythonVersion == "" { pythonVersion = "3.12" @@ -63,18 +62,29 @@ func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, a return resolvePackages(ctx, specs, indexes, pythonVersion, arch) } -func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage) error { +func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage, config types.EcosystemConfig) (map[string]string, error) { log := clog.FromContext(ctx) pythonVersion := detectPythonVersion(fsys) if pythonVersion == "" { - return fmt.Errorf("no Python installation found in filesystem; install python3 via APK first") + return nil, fmt.Errorf("no Python installation found in filesystem; install python3 via APK first") } log.Infof("detected Python %s for python ecosystem install", pythonVersion) - sitePackagesPath := fmt.Sprintf("usr/lib/python%s/site-packages", pythonVersion) + var sitePackagesPath string + if config.Venv != "" { + venvPath := strings.TrimPrefix(config.Venv, "/") + if err := createVenv(fsys, venvPath, pythonVersion); err != nil { + return nil, fmt.Errorf("creating virtual environment at %s: %w", config.Venv, err) + } + sitePackagesPath = filepath.Join(venvPath, "lib", "python"+pythonVersion, "site-packages") + log.Infof("using virtual environment at %s", config.Venv) + } else { + sitePackagesPath = fmt.Sprintf("usr/lib/python%s/site-packages", pythonVersion) + } + if err := fsys.MkdirAll(sitePackagesPath, 0755); err != nil { - return fmt.Errorf("creating site-packages directory: %w", err) + return nil, fmt.Errorf("creating site-packages directory: %w", err) } for _, pkg := range packages { @@ -82,15 +92,15 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e data, err := downloadWheel(ctx, pkg.URL) if err != nil { - return fmt.Errorf("downloading %s: %w", pkg.Name, err) + return nil, fmt.Errorf("downloading %s: %w", pkg.Name, err) } if err := verifyChecksum(data, pkg.Checksum); err != nil { - return fmt.Errorf("verifying %s: %w", pkg.Name, err) + return nil, fmt.Errorf("verifying %s: %w", pkg.Name, err) } if err := extractWheel(fsys, data, sitePackagesPath); err != nil { - return fmt.Errorf("extracting %s: %w", pkg.Name, err) + return nil, fmt.Errorf("extracting %s: %w", pkg.Name, err) } if err := writeInstallerFile(fsys, sitePackagesPath, data); err != nil { @@ -98,6 +108,57 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e } } + // When using a venv, set VIRTUAL_ENV and prepend its bin/ to PATH. + if config.Venv != "" { + venvBin := filepath.Join(config.Venv, "bin") + return map[string]string{ + "VIRTUAL_ENV": config.Venv, + "PATH": venvBin + ":/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + }, nil + } + + return nil, nil +} + +// createVenv sets up a virtual environment directory structure. +func createVenv(fsys apkfs.FullFS, venvPath, pythonVersion string) error { + // Create directory structure + dirs := []string{ + filepath.Join(venvPath, "bin"), + filepath.Join(venvPath, "include"), + filepath.Join(venvPath, "lib", "python"+pythonVersion, "site-packages"), + } + for _, dir := range dirs { + if err := fsys.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("creating %s: %w", dir, err) + } + } + + // Write pyvenv.cfg + cfg := fmt.Sprintf( + "home = /usr/bin\ninclude-system-site-packages = false\nversion = %s\n", + pythonVersion, + ) + cfgPath := filepath.Join(venvPath, "pyvenv.cfg") + if err := fsys.WriteFile(cfgPath, []byte(cfg), 0644); err != nil { + return fmt.Errorf("writing pyvenv.cfg: %w", err) + } + + // Create symlinks in bin/ + pythonBin := "/usr/bin/python" + pythonVersion + binPath := filepath.Join(venvPath, "bin") + symlinks := map[string]string{ + "python": pythonBin, + "python3": pythonBin, + "python" + pythonVersion: pythonBin, + } + for name, target := range symlinks { + linkPath := filepath.Join(binPath, name) + if err := fsys.Symlink(target, linkPath); err != nil { + return fmt.Errorf("creating symlink %s: %w", linkPath, err) + } + } + return nil } diff --git a/pkg/ecosystem/python/python_test.go b/pkg/ecosystem/python/python_test.go index af966bfa1..dd85cb305 100644 --- a/pkg/ecosystem/python/python_test.go +++ b/pkg/ecosystem/python/python_test.go @@ -21,6 +21,55 @@ import ( "chainguard.dev/apko/pkg/ecosystem" ) +func TestCreateVenv(t *testing.T) { + fs := apkfs.NewMemFS() + if err := fs.MkdirAll("usr/bin", 0755); err != nil { + t.Fatal(err) + } + + err := createVenv(fs, "app/venv", "3.12") + if err != nil { + t.Fatalf("createVenv() error: %v", err) + } + + // Check pyvenv.cfg + data, err := fs.ReadFile("app/venv/pyvenv.cfg") + if err != nil { + t.Fatalf("reading pyvenv.cfg: %v", err) + } + cfg := string(data) + if !contains(cfg, "home = /usr/bin") { + t.Errorf("pyvenv.cfg missing home, got: %q", cfg) + } + if !contains(cfg, "version = 3.12") { + t.Errorf("pyvenv.cfg missing version, got: %q", cfg) + } + + // Check directories exist + for _, dir := range []string{ + "app/venv/bin", + "app/venv/include", + "app/venv/lib/python3.12/site-packages", + } { + if _, err := fs.Stat(dir); err != nil { + t.Errorf("directory %s should exist: %v", dir, err) + } + } + + // Check symlinks + for _, name := range []string{"python", "python3", "python3.12"} { + target, err := fs.Readlink("app/venv/bin/" + name) + if err != nil { + t.Errorf("symlink %s should exist: %v", name, err) + continue + } + if target != "/usr/bin/python3.12" { + t.Errorf("symlink %s = %q, want %q", name, target, "/usr/bin/python3.12") + } + } +} + + func TestInstallerRegistration(t *testing.T) { inst, ok := ecosystem.Get("python") if !ok { From 2f0705e50cd1d5b14ada044c2f82f73c93ff5580 Mon Sep 17 00:00:00 2001 From: Justin Vreeland Date: Fri, 3 Apr 2026 16:06:46 -0700 Subject: [PATCH 06/12] feat: add per-package SBOMs, provenance URL threading, and transitive dep resolution Write SPDX 2.3 SBOMs into dist-info/sboms/sbom.spdx.json for Chainguard-sourced packages, enabling chainctl libraries verify to confirm provenance. Parse data-provenance and data-signature attributes from Simple API HTML and thread them through to ResolvedPackage. Add transitive dependency resolution for non-PyPI indexes by downloading wheels and parsing METADATA for Requires-Dist entries. Also fixes an off-by-one bug in parseSimpleIndex tag extraction that caused data-requires-python (and provenance/signature) attributes to be attributed to the wrong link. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/ecosystem/ecosystem.go | 53 ++++++-- pkg/ecosystem/python/python.go | 40 +++++- pkg/ecosystem/python/resolve.go | 191 +++++++++++++++++++++------ pkg/ecosystem/python/resolve_test.go | 71 +++++++++- pkg/ecosystem/python/sbom.go | 119 +++++++++++++++++ pkg/ecosystem/python/sbom_test.go | 100 ++++++++++++++ 6 files changed, 511 insertions(+), 63 deletions(-) create mode 100644 pkg/ecosystem/python/sbom.go create mode 100644 pkg/ecosystem/python/sbom_test.go diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go index cc475e07b..0c9de3fb9 100644 --- a/pkg/ecosystem/ecosystem.go +++ b/pkg/ecosystem/ecosystem.go @@ -20,17 +20,20 @@ import ( "sync" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/apk/auth" "chainguard.dev/apko/pkg/build/types" ) // ResolvedPackage represents a package that has been resolved to a specific // version and download URL. type ResolvedPackage struct { - Ecosystem string - Name string - Version string - URL string - Checksum string // "sha256:" + Ecosystem string + Name string + Version string + URL string + Checksum string // "sha256:" + SignatureURL string // optional: signature bundle URL (from data-signature) + ProvenanceURL string // optional: provenance data URL (from data-provenance) } // Installer is the interface that ecosystem package installers must implement. @@ -38,15 +41,19 @@ type Installer interface { // Name returns the ecosystem name (e.g., "python"). Name() string // Resolve resolves the requested packages to specific versions and URLs. - Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ResolvedPackage, error) + Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) ([]ResolvedPackage, error) // Install extracts resolved packages into the filesystem. // Returns environment variables that should be set in the image configuration. - Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage, config types.EcosystemConfig) (map[string]string, error) + Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage, config types.EcosystemConfig, a auth.Authenticator) (map[string]string, error) } +// RequiredAPKPackagesFunc returns APK packages that an ecosystem requires. +type RequiredAPKPackagesFunc func(config types.EcosystemConfig) []string + var ( - registryMu sync.RWMutex - registry = map[string]func() Installer{} + registryMu sync.RWMutex + registry = map[string]func() Installer{} + apkPkgsFuncs = map[string]RequiredAPKPackagesFunc{} ) // Register registers an ecosystem installer factory. @@ -56,6 +63,28 @@ func Register(name string, factory func() Installer) { registry[name] = factory } +// RegisterRequiredAPKPackages registers a function that returns APK packages +// required by the named ecosystem. +func RegisterRequiredAPKPackages(name string, fn RequiredAPKPackagesFunc) { + registryMu.Lock() + defer registryMu.Unlock() + apkPkgsFuncs[name] = fn +} + +// RequiredPackages returns APK packages required by all configured ecosystems. +// These should be injected into ImageContents.Packages before resolution. +func RequiredPackages(ecosystems map[string]types.EcosystemConfig) []string { + registryMu.RLock() + defer registryMu.RUnlock() + var pkgs []string + for name, config := range ecosystems { + if fn, ok := apkPkgsFuncs[name]; ok { + pkgs = append(pkgs, fn(config)...) + } + } + return pkgs +} + // Get returns an installer for the named ecosystem. func Get(name string) (Installer, bool) { registryMu.RLock() @@ -69,21 +98,21 @@ func Get(name string) (Installer, bool) { // InstallAll installs packages for all configured ecosystems. // Returns environment variables that should be set in the image configuration. -func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture) (map[string]string, error) { +func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) (map[string]string, error) { env := map[string]string{} for name, config := range ecosystems { installer, ok := Get(name) if !ok { return nil, fmt.Errorf("unknown ecosystem: %s", name) } - resolved, err := installer.Resolve(ctx, config, arch) + resolved, err := installer.Resolve(ctx, config, arch, a) if err != nil { return nil, fmt.Errorf("resolving %s packages: %w", name, err) } if len(resolved) == 0 { continue } - vars, err := installer.Install(ctx, fs, resolved, config) + vars, err := installer.Install(ctx, fs, resolved, config, a) if err != nil { return nil, fmt.Errorf("installing %s packages: %w", name, err) } diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go index d4d5320c9..8f27b76bb 100644 --- a/pkg/ecosystem/python/python.go +++ b/pkg/ecosystem/python/python.go @@ -25,6 +25,7 @@ import ( "github.com/chainguard-dev/clog" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/apk/auth" "chainguard.dev/apko/pkg/build/types" "chainguard.dev/apko/pkg/ecosystem" ) @@ -33,13 +34,28 @@ func init() { ecosystem.Register("python", func() ecosystem.Installer { return &installer{} }) + ecosystem.RegisterRequiredAPKPackages("python", RequiredAPKPackages) +} + +// RequiredAPKPackages returns the APK packages needed for the configured +// Python version. When python_version is set, it injects both the base +// interpreter and the full python package so users don't need to list them +// manually in contents.packages. +func RequiredAPKPackages(config types.EcosystemConfig) []string { + if config.PythonVersion == "" { + return nil + } + return []string{ + "python-" + config.PythonVersion + "-base", + "python-" + config.PythonVersion, + } } type installer struct{} func (i *installer) Name() string { return "python" } -func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { +func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { if len(config.Packages) == 0 { return nil, nil } @@ -56,13 +72,13 @@ func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, a pythonVersion := config.PythonVersion if pythonVersion == "" { - pythonVersion = "3.12" + return nil, fmt.Errorf("python_version is required in ecosystem python config") } - return resolvePackages(ctx, specs, indexes, pythonVersion, arch) + return resolvePackages(ctx, specs, indexes, pythonVersion, arch, a) } -func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage, config types.EcosystemConfig) (map[string]string, error) { +func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage, config types.EcosystemConfig, a auth.Authenticator) (map[string]string, error) { log := clog.FromContext(ctx) pythonVersion := detectPythonVersion(fsys) @@ -90,7 +106,7 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e for _, pkg := range packages { log.Infof("installing python package %s==%s", pkg.Name, pkg.Version) - data, err := downloadWheel(ctx, pkg.URL) + data, err := downloadWheel(ctx, pkg.URL, a) if err != nil { return nil, fmt.Errorf("downloading %s: %w", pkg.Name, err) } @@ -106,6 +122,12 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e if err := writeInstallerFile(fsys, sitePackagesPath, data); err != nil { log.Debugf("could not write INSTALLER file for %s: %v", pkg.Name, err) } + + if isChainguardSource(pkg.URL) { + if err := writePackageSBOM(fsys, sitePackagesPath, data, pkg); err != nil { + log.Debugf("could not write SBOM for %s: %v", pkg.Name, err) + } + } } // When using a venv, set VIRTUAL_ENV and prepend its bin/ to PATH. @@ -181,12 +203,18 @@ func detectPythonVersion(fsys apkfs.FullFS) string { } // downloadWheel downloads a wheel file from the given URL. -func downloadWheel(ctx context.Context, url string) ([]byte, error) { +func downloadWheel(ctx context.Context, url string, a auth.Authenticator) ([]byte, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } + if a != nil { + if err := a.AddAuth(ctx, req); err != nil { + return nil, fmt.Errorf("adding auth for %s: %w", url, err) + } + } + resp, err := http.DefaultClient.Do(req) if err != nil { return nil, err diff --git a/pkg/ecosystem/python/resolve.go b/pkg/ecosystem/python/resolve.go index f6205193b..52cabf1b3 100644 --- a/pkg/ecosystem/python/resolve.go +++ b/pkg/ecosystem/python/resolve.go @@ -15,6 +15,8 @@ package python import ( + "archive/zip" + "bytes" "context" "encoding/json" "fmt" @@ -24,6 +26,7 @@ import ( "regexp" "strings" + "chainguard.dev/apko/pkg/apk/auth" "chainguard.dev/apko/pkg/build/types" "chainguard.dev/apko/pkg/ecosystem" @@ -159,7 +162,7 @@ type pypiVersionsJSON struct { // resolvePackages resolves package specs to specific wheel URLs, // including transitive dependencies discovered via the PyPI JSON API. -func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture) ([]ecosystem.ResolvedPackage, error) { +func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { log := clog.FromContext(ctx) if len(indexes) == 0 { @@ -182,7 +185,7 @@ func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, continue } - pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch) + pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch, a) if err != nil { return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) } @@ -204,22 +207,22 @@ func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, // resolveOneWithDeps resolves a package and returns both the resolved package // and its transitive dependencies. It tries the PyPI JSON API first (which // gives us clean metadata), falling back to the Simple API for non-PyPI indexes. -func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { // Try PyPI JSON API first — it gives us metadata + wheel URLs in one call if usesDefaultPyPI(indexes) { - pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch) + pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch, a) if err == nil { return pkg, deps, nil } clog.FromContext(ctx).Debugf("JSON API failed for %s, falling back to Simple API: %v", spec.Name, err) } - // Fall back to Simple API - pkg, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch) + // Fall back to Simple API (downloads wheel to extract Requires-Dist for deps) + pkg, deps, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch, a) if err != nil { return ecosystem.ResolvedPackage{}, nil, err } - return pkg, nil, nil + return pkg, deps, nil } func usesDefaultPyPI(indexes []string) bool { @@ -236,17 +239,17 @@ func usesDefaultPyPI(indexes []string) bool { // resolveViaJSON resolves a package using the PyPI JSON API. // Returns the resolved package and its parsed Requires-Dist as deps. -func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { name := normalizeName(spec.Name) // If we have an exact version, fetch that directly if spec.Operator == "==" { - return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch) + return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch, a) } // Otherwise, list all versions and pick the best versionsURL := pypiJSONBase() + name + "/json" - data, err := httpGet(ctx, versionsURL) + data, err := httpGet(ctx, versionsURL, a) if err != nil { return ecosystem.ResolvedPackage{}, nil, err } @@ -274,13 +277,13 @@ func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("no matching version for %s%s%s", spec.Name, spec.Operator, spec.Version) } - return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch) + return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch, a) } // resolveJSONVersion fetches a specific version from the PyPI JSON API. -func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { versionURL := pypiJSONBase() + normalizedName + "/" + version + "/json" - data, err := httpGet(ctx, versionURL) + data, err := httpGet(ctx, versionURL, a) if err != nil { return ecosystem.ResolvedPackage{}, nil, err } @@ -372,14 +375,20 @@ type wheelLink struct { URL string Checksum string // "sha256:" RequiresPython string + SignatureURL string // optional: from data-signature attribute + ProvenanceURL string // optional: from data-provenance attribute } // parseSimpleIndex parses the HTML from a PEP 503 Simple Repository API response. func parseSimpleIndex(body string, baseURL string) []wheelLink { var links []wheelLink - linkRe := regexp.MustCompile(`]*href="([^"]*)"[^>]*>([^<]*)`) + // Use a regex that handles '>' inside quoted attribute values (e.g., data-requires-python=">=3.0"). + // The [^>]* approach breaks when attributes contain '>' characters. + linkRe := regexp.MustCompile(`"]*(?:"[^"]*")?)*href="([^"]*)"(?:[^>"]*(?:"[^"]*")?)*>([^<]*)`) requiresPythonRe := regexp.MustCompile(`data-requires-python="([^"]*)"`) + provenanceRe := regexp.MustCompile(`data-provenance="([^"]*)"`) + signatureRe := regexp.MustCompile(`data-signature="([^"]*)"`) for _, match := range linkRe.FindAllStringSubmatch(body, -1) { href := match[1] @@ -404,15 +413,35 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { } } - var requiresPython string - tagStart := strings.LastIndex(body[:strings.Index(body, match[0])+1], "= 0 { - tagEnd := strings.Index(body[tagStart:], ">") + tagStart - tag := body[tagStart : tagEnd+1] - if rpMatch := requiresPythonRe.FindStringSubmatch(tag); rpMatch != nil { - requiresPython = strings.ReplaceAll(rpMatch[1], ">", ">") - requiresPython = strings.ReplaceAll(requiresPython, "<", "<") - requiresPython = strings.ReplaceAll(requiresPython, "&", "&") + var requiresPython, provenanceURL, signatureURL string + matchIdx := strings.Index(body, match[0]) + if matchIdx >= 0 { + // match[0] starts with "= 0 { + // Find the closing '>' of the tag, skipping '>' inside quoted attributes. + tag := "" + rest := body[tagStart:] + inQuote := false + for j, c := range rest { + if c == '"' { + inQuote = !inQuote + } else if c == '>' && !inQuote { + tag = rest[:j+1] + break + } + } + if rpMatch := requiresPythonRe.FindStringSubmatch(tag); rpMatch != nil { + requiresPython = strings.ReplaceAll(rpMatch[1], ">", ">") + requiresPython = strings.ReplaceAll(requiresPython, "<", "<") + requiresPython = strings.ReplaceAll(requiresPython, "&", "&") + } + if pvMatch := provenanceRe.FindStringSubmatch(tag); pvMatch != nil { + provenanceURL = pvMatch[1] + } + if sigMatch := signatureRe.FindStringSubmatch(tag); sigMatch != nil { + signatureURL = sigMatch[1] + } } } @@ -421,6 +450,8 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { URL: linkURL, Checksum: checksum, RequiresPython: requiresPython, + SignatureURL: signatureURL, + ProvenanceURL: provenanceURL, }) } @@ -428,14 +459,15 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { } // resolveViaSimple resolves a package using the PEP 503 Simple API. -// Does not return transitive deps (no metadata available without downloading). -func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture) (ecosystem.ResolvedPackage, error) { +// After finding the best wheel, it downloads it to extract Requires-Dist +// metadata for transitive dependency resolution. +func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { name := normalizeName(spec.Name) for _, index := range indexes { indexURL := strings.TrimSuffix(index, "/") + "/" + name + "/" - body, err := fetchSimpleIndex(ctx, indexURL) + body, err := fetchSimpleIndex(ctx, indexURL, a) if err != nil { clog.FromContext(ctx).Debugf("index %s: %v", indexURL, err) continue @@ -451,22 +483,83 @@ func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, p continue } - return ecosystem.ResolvedPackage{ - Ecosystem: "python", - Name: spec.Name, - Version: best.version, - URL: best.url, - Checksum: best.checksum, - }, nil + pkg := ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: spec.Name, + Version: best.version, + URL: best.url, + Checksum: best.checksum, + SignatureURL: best.signatureURL, + ProvenanceURL: best.provenanceURL, + } + + // Download wheel to extract Requires-Dist for transitive deps. + deps, err := extractDepsFromWheel(ctx, best.url, a) + if err != nil { + clog.FromContext(ctx).Debugf("could not extract deps from wheel for %s: %v", spec.Name, err) + } + + return pkg, deps, nil + } + + return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("package %s not found in any index", spec.Name) +} + +// extractDepsFromWheel downloads a wheel and parses its METADATA for Requires-Dist. +func extractDepsFromWheel(ctx context.Context, url string, a auth.Authenticator) ([]packageSpec, error) { + data, err := httpGet(ctx, url, a) + if err != nil { + return nil, fmt.Errorf("downloading wheel: %w", err) + } + + reader, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, fmt.Errorf("opening wheel as zip: %w", err) + } + + for _, f := range reader.File { + if !strings.HasSuffix(f.Name, ".dist-info/METADATA") { + continue + } + rc, err := f.Open() + if err != nil { + return nil, fmt.Errorf("opening METADATA: %w", err) + } + metadataBytes, err := io.ReadAll(rc) + rc.Close() + if err != nil { + return nil, fmt.Errorf("reading METADATA: %w", err) + } + return parseRequiresDist(string(metadataBytes)), nil } - return ecosystem.ResolvedPackage{}, fmt.Errorf("package %s not found in any index", spec.Name) + return nil, nil +} + +// parseRequiresDist extracts Requires-Dist entries from wheel METADATA content. +func parseRequiresDist(metadata string) []packageSpec { + var deps []packageSpec + for _, line := range strings.Split(metadata, "\n") { + line = strings.TrimRight(line, "\r") + if !strings.HasPrefix(line, "Requires-Dist: ") { + continue + } + req := strings.TrimPrefix(line, "Requires-Dist: ") + dep := parsePackageSpec(req) + if dep.Markers != "" && !evaluateMarkers(dep.Markers, nil) { + continue + } + deps = append(deps, dep) + } + return deps } type selectedWheel struct { - version string - url string - checksum string + version string + url string + checksum string + signatureURL string + provenanceURL string } // selectBestWheel selects the best compatible wheel from Simple API links. @@ -500,9 +593,11 @@ func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, } return selectedWheel{ - version: bestParts.Version, - url: bestLink.URL, - checksum: bestLink.Checksum, + version: bestParts.Version, + url: bestLink.URL, + checksum: bestLink.Checksum, + signatureURL: bestLink.SignatureURL, + provenanceURL: bestLink.ProvenanceURL, }, nil } @@ -598,13 +693,19 @@ func parseVersionPart(s string) int { // --- HTTP helpers --- -func fetchSimpleIndex(ctx context.Context, url string) (string, error) { +func fetchSimpleIndex(ctx context.Context, url string, a auth.Authenticator) (string, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return "", err } req.Header.Set("Accept", "text/html") + if a != nil { + if err := a.AddAuth(ctx, req); err != nil { + return "", fmt.Errorf("adding auth for %s: %w", url, err) + } + } + resp, err := http.DefaultClient.Do(req) if err != nil { return "", err @@ -622,12 +723,18 @@ func fetchSimpleIndex(ctx context.Context, url string) (string, error) { return string(body), nil } -func httpGet(ctx context.Context, url string) ([]byte, error) { +func httpGet(ctx context.Context, url string, a auth.Authenticator) ([]byte, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) if err != nil { return nil, err } + if a != nil { + if err := a.AddAuth(ctx, req); err != nil { + return nil, fmt.Errorf("adding auth for %s: %w", url, err) + } + } + resp, err := http.DefaultClient.Do(req) if err != nil { return nil, err diff --git a/pkg/ecosystem/python/resolve_test.go b/pkg/ecosystem/python/resolve_test.go index 8f24b113b..711a5bd6b 100644 --- a/pkg/ecosystem/python/resolve_test.go +++ b/pkg/ecosystem/python/resolve_test.go @@ -105,6 +105,71 @@ func TestParseSimpleIndex(t *testing.T) { } } +func TestParseSimpleIndexProvenance(t *testing.T) { + body := ` + +foo-1.0.0-py3-none-any.whl +bar-2.0.0-py3-none-any.whl + +` + links := parseSimpleIndex(body, "https://cgr.dev/simple/") + if len(links) != 2 { + t.Fatalf("expected 2 wheel links, got %d", len(links)) + } + + // First link should have provenance and signature + if links[0].ProvenanceURL != "https://cgr.dev/prov/foo" { + t.Errorf("links[0].ProvenanceURL = %q, want %q", links[0].ProvenanceURL, "https://cgr.dev/prov/foo") + } + if links[0].SignatureURL != "https://cgr.dev/sig/foo" { + t.Errorf("links[0].SignatureURL = %q, want %q", links[0].SignatureURL, "https://cgr.dev/sig/foo") + } + if links[0].RequiresPython != ">=3.8" { + t.Errorf("links[0].RequiresPython = %q, want %q", links[0].RequiresPython, ">=3.8") + } + + // Second link should have empty provenance/signature + if links[1].ProvenanceURL != "" { + t.Errorf("links[1].ProvenanceURL = %q, want empty", links[1].ProvenanceURL) + } + if links[1].SignatureURL != "" { + t.Errorf("links[1].SignatureURL = %q, want empty", links[1].SignatureURL) + } +} + +func TestParseRequiresDist(t *testing.T) { + metadata := `Metadata-Version: 2.1 +Name: vunnel +Version: 0.55.3 +Requires-Dist: click>=8.0 +Requires-Dist: PyYAML>=6.0 +Requires-Dist: colorlog>=6.0 +Requires-Dist: pytest; extra == "dev" +Requires-Dist: importlib-metadata>=4.0; python_version < "3.8" +` + deps := parseRequiresDist(metadata) + + // Should get click, PyYAML, colorlog (not pytest which needs extra, not importlib-metadata gated on old python) + names := map[string]bool{} + for _, d := range deps { + names[normalizeName(d.Name)] = true + } + if !names["click"] { + t.Error("missing click") + } + if !names["pyyaml"] { + t.Error("missing pyyaml") + } + if !names["colorlog"] { + t.Error("missing colorlog") + } + if names["pytest"] { + t.Error("should not include pytest (extra-gated)") + } + // importlib-metadata is python_version gated — evaluateMarkers is permissive for python_version + // so it WILL be included (which is correct — we filter by wheel compatibility later) +} + func TestCompareVersions(t *testing.T) { tests := []struct { a, b string @@ -234,7 +299,7 @@ func TestResolveWithMockJSON(t *testing.T) { pypiJSONBaseOverride = server.URL + "/pypi/" specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} - resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), nil) if err != nil { t.Fatalf("resolvePackages() error: %v", err) } @@ -318,7 +383,7 @@ func TestResolveTransitiveDeps(t *testing.T) { defer func() { pypiJSONBaseOverride = "" }() specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} - resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), nil) if err != nil { t.Fatalf("resolvePackages() error: %v", err) } @@ -355,7 +420,7 @@ func TestResolveSimpleApiFallback(t *testing.T) { specs := []packageSpec{{Name: "mypackage", Operator: "==", Version: "1.0.0"}} // Use a non-pypi index so it doesn't try the JSON API - resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64")) + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), nil) if err != nil { t.Fatalf("resolvePackages() error: %v", err) } diff --git a/pkg/ecosystem/python/sbom.go b/pkg/ecosystem/python/sbom.go new file mode 100644 index 000000000..a7b8cd928 --- /dev/null +++ b/pkg/ecosystem/python/sbom.go @@ -0,0 +1,119 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "archive/zip" + "bytes" + "encoding/json" + "fmt" + "path/filepath" + "strings" + "time" + + apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/ecosystem" +) + +// isChainguardSource returns true if the URL points to a Chainguard Libraries index. +func isChainguardSource(url string) bool { + return strings.Contains(url, "cgr.dev") +} + +// writePackageSBOM writes a minimal SPDX 2.3 SBOM into the dist-info/sboms/ directory. +// This enables `chainctl libraries verify` to confirm Chainguard provenance. +func writePackageSBOM(fsys apkfs.FullFS, sitePackagesPath string, wheelData []byte, pkg ecosystem.ResolvedPackage) error { + reader, err := zip.NewReader(bytes.NewReader(wheelData), int64(len(wheelData))) + if err != nil { + return err + } + + // Find the .dist-info directory name from the wheel contents. + var distInfoDir string + for _, f := range reader.File { + if strings.HasSuffix(f.Name, ".dist-info/METADATA") { + distInfoDir = filepath.Dir(f.Name) + break + } + } + if distInfoDir == "" { + return fmt.Errorf("no .dist-info/METADATA found in wheel") + } + + sbomData, err := generatePackageSBOM(pkg) + if err != nil { + return fmt.Errorf("generating SBOM: %w", err) + } + + sbomDir := filepath.Join(sitePackagesPath, distInfoDir, "sboms") + if err := fsys.MkdirAll(sbomDir, 0755); err != nil { + return fmt.Errorf("creating sboms directory: %w", err) + } + + sbomPath := filepath.Join(sbomDir, "sbom.spdx.json") + return fsys.WriteFile(sbomPath, sbomData, 0644) +} + +// spdxDocument is a minimal SPDX 2.3 JSON document structure. +type spdxDocument struct { + SPDXVersion string `json:"spdxVersion"` + DataLicense string `json:"dataLicense"` + SPDXID string `json:"SPDXID"` + Name string `json:"name"` + Namespace string `json:"documentNamespace"` + CreationInfo spdxCreationInfo `json:"creationInfo"` + Packages []spdxPackage `json:"packages"` +} + +type spdxCreationInfo struct { + Created string `json:"created"` + Creators []string `json:"creators"` +} + +type spdxPackage struct { + SPDXID string `json:"SPDXID"` + Name string `json:"name"` + Version string `json:"versionInfo"` + Supplier string `json:"supplier"` + Originator string `json:"originator"` + DownloadLocation string `json:"downloadLocation"` + FilesAnalyzed bool `json:"filesAnalyzed"` +} + +// generatePackageSBOM generates a minimal SPDX 2.3 JSON SBOM for a Chainguard-sourced package. +func generatePackageSBOM(pkg ecosystem.ResolvedPackage) ([]byte, error) { + doc := spdxDocument{ + SPDXVersion: "SPDX-2.3", + DataLicense: "CC0-1.0", + SPDXID: "SPDXRef-DOCUMENT", + Name: pkg.Name + "-" + pkg.Version, + Namespace: "https://chainguard.dev/spdx/" + pkg.Name + "-" + pkg.Version, + CreationInfo: spdxCreationInfo{ + Created: time.Now().UTC().Format(time.RFC3339), + Creators: []string{"Tool: apko", "Organization: Chainguard, Inc."}, + }, + Packages: []spdxPackage{{ + SPDXID: "SPDXRef-Package", + Name: pkg.Name, + Version: pkg.Version, + Supplier: "Organization: Chainguard, Inc.", + Originator: "Organization: Chainguard, Inc.", + DownloadLocation: pkg.URL, + FilesAnalyzed: false, + }}, + } + + return json.MarshalIndent(doc, "", " ") +} diff --git a/pkg/ecosystem/python/sbom_test.go b/pkg/ecosystem/python/sbom_test.go new file mode 100644 index 000000000..18096acd9 --- /dev/null +++ b/pkg/ecosystem/python/sbom_test.go @@ -0,0 +1,100 @@ +// Copyright 2024 Chainguard, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package python + +import ( + "encoding/json" + "strings" + "testing" + + "chainguard.dev/apko/pkg/ecosystem" +) + +func TestIsChainguardSource(t *testing.T) { + tests := []struct { + url string + want bool + }{ + {"https://cgr.dev/chainguard-dev/libraries/python/simple/flask/Flask-3.0.0-py3-none-any.whl", true}, + {"https://packages.cgr.dev/os/x86_64/some-package.whl", true}, + {"https://pypi.org/simple/flask/Flask-3.0.0-py3-none-any.whl", false}, + {"https://files.pythonhosted.org/packages/Flask-3.0.0-py3-none-any.whl", false}, + } + + for _, tt := range tests { + t.Run(tt.url, func(t *testing.T) { + got := isChainguardSource(tt.url) + if got != tt.want { + t.Errorf("isChainguardSource(%q) = %v, want %v", tt.url, got, tt.want) + } + }) + } +} + +func TestGeneratePackageSBOM(t *testing.T) { + pkg := ecosystem.ResolvedPackage{ + Ecosystem: "python", + Name: "flask", + Version: "3.0.0", + URL: "https://cgr.dev/chainguard-dev/libraries/python/simple/flask/Flask-3.0.0-py3-none-any.whl", + Checksum: "sha256:abc123", + } + + data, err := generatePackageSBOM(pkg) + if err != nil { + t.Fatalf("generatePackageSBOM() error: %v", err) + } + + var doc spdxDocument + if err := json.Unmarshal(data, &doc); err != nil { + t.Fatalf("unmarshaling SBOM: %v", err) + } + + if doc.SPDXVersion != "SPDX-2.3" { + t.Errorf("SPDXVersion = %q, want %q", doc.SPDXVersion, "SPDX-2.3") + } + + // Verify creators include Chainguard — this is what chainctl libraries verify checks. + foundChainguard := false + for _, c := range doc.CreationInfo.Creators { + if strings.Contains(strings.ToLower(c), "chainguard") { + foundChainguard = true + } + } + if !foundChainguard { + t.Errorf("creationInfo.creators %v does not contain Chainguard", doc.CreationInfo.Creators) + } + + if len(doc.Packages) != 1 { + t.Fatalf("expected 1 package, got %d", len(doc.Packages)) + } + + p := doc.Packages[0] + if p.Name != "flask" { + t.Errorf("package name = %q, want %q", p.Name, "flask") + } + if p.Version != "3.0.0" { + t.Errorf("package version = %q, want %q", p.Version, "3.0.0") + } + if !strings.Contains(strings.ToLower(p.Supplier), "chainguard") { + t.Errorf("supplier = %q, does not contain chainguard", p.Supplier) + } + if !strings.Contains(strings.ToLower(p.Originator), "chainguard") { + t.Errorf("originator = %q, does not contain chainguard", p.Originator) + } + if p.DownloadLocation != pkg.URL { + t.Errorf("downloadLocation = %q, want %q", p.DownloadLocation, pkg.URL) + } +} From 001c70e7b9f5b6870837c5c84027e7400b000c68 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Mon, 6 Apr 2026 14:33:30 -0600 Subject: [PATCH 07/12] feat: add layering support for ecosystem packages Extend the existing origin-based layering strategy to support ecosystem packages (e.g. Python pip packages) as separate layers, without treating them as APK packages. The approach generalizes file ownership in the filesystem to a string-based "owner" concept. APK files get their owner from the existing tar entry package metadata; ecosystem files get tagged via SetCurrentOwner during installation. The splitLayers function routes files to layers using the Owner() interface, which works for both. Key changes: - tarfs: Add owner field to nodes, SetCurrentOwner/OwnerSize on memFS, Owner() method on memFileInfo that returns APK pkg name or ecosystem owner - ecosystem: Add OwnerTagger interface, OwnerName() on ResolvedPackage, InstalledSize populated after install. Installers tag files themselves. - layers: Generalize group to carry owners[] alongside pkgs[]. Factor groupByOriginAndSize into groupAPKByOrigin + applyBudget so ecosystem groups participate in the shared budget without APK-specific logic. - python installer: Tags files per-package around wheel extraction Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/cli/lock.go | 2 +- pkg/build/build.go | 6 ++ pkg/build/build_implementation.go | 3 +- pkg/build/layers.go | 95 ++++++++++++++++----- pkg/build/layers_test.go | 135 +++++++++++++++++++++++++++++- pkg/ecosystem/ecosystem.go | 40 +++++++-- pkg/ecosystem/python/python.go | 13 ++- pkg/tarfs/fs.go | 51 +++++++++++ 8 files changed, 313 insertions(+), 32 deletions(-) diff --git a/internal/cli/lock.go b/internal/cli/lock.go index 22d46382f..dcf3f845a 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -254,7 +254,7 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt return fmt.Errorf("unknown ecosystem: %s", name) } for _, arch := range archs { - resolved, err := installer.Resolve(ctx, ecoConfig, arch) + resolved, err := installer.Resolve(ctx, ecoConfig, arch, nil) if err != nil { return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err) } diff --git a/pkg/build/build.go b/pkg/build/build.go index 007004540..ad385a80f 100644 --- a/pkg/build/build.go +++ b/pkg/build/build.go @@ -41,6 +41,7 @@ import ( apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/baseimg" "chainguard.dev/apko/pkg/build/types" + "chainguard.dev/apko/pkg/ecosystem" "chainguard.dev/apko/pkg/options" "chainguard.dev/apko/pkg/paths" "chainguard.dev/apko/pkg/s6" @@ -64,6 +65,11 @@ type Context struct { fs apkfs.FullFS apk *apk.APK baseimg *baseimg.BaseImage + + // ecosystemPkgs holds resolved ecosystem packages with InstalledSize + // populated after installation. Used by buildLayers to create + // separate layers for ecosystem packages. + ecosystemPkgs []ecosystem.ResolvedPackage } func (bc *Context) Summarize(ctx context.Context) { diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index 68794e454..e74928bfe 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -182,10 +182,11 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) // Install ecosystem packages (python, etc.) after APK packages so that // the language runtime is available for version detection. if len(bc.ic.Contents.Ecosystems) > 0 { - env, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch) + env, ecoPkgs, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch, nil) if err != nil { return nil, fmt.Errorf("installing ecosystem packages: %w", err) } + bc.ecosystemPkgs = ecoPkgs if len(env) > 0 { if bc.ic.Environment == nil { bc.ic.Environment = make(map[string]string) diff --git a/pkg/build/layers.go b/pkg/build/layers.go index f0716616a..321b80479 100644 --- a/pkg/build/layers.go +++ b/pkg/build/layers.go @@ -25,6 +25,7 @@ import ( "os" "path" "slices" + "strings" "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" @@ -50,10 +51,10 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { return nil, fmt.Errorf("building filesystem: %w", err) } - pkgs := make([]*apk.Package, 0, len(diffs)) + apkPkgs := make([]*apk.Package, 0, len(diffs)) pkgToDiff := map[*apk.Package][]byte{} for _, pkgDiff := range diffs { - pkgs = append(pkgs, pkgDiff.Package) + apkPkgs = append(apkPkgs, pkgDiff.Package) pkgToDiff[pkgDiff.Package] = pkgDiff.Diff } @@ -69,11 +70,28 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { return nil, err } - // Use our layering strategy to partition packages into a set of Budget groups. - groups, err := groupByOriginAndSize(pkgs, bc.ic.Layering.Budget) + // Group APK packages by origin/replaces. + apkGroups, err := groupAPKByOrigin(apkPkgs) if err != nil { - return nil, fmt.Errorf("grouping packages: %w", err) + return nil, fmt.Errorf("grouping apk packages: %w", err) } + + // Create a separate group for each ecosystem package. + // Each gets its own group since ecosystem packages are independently versioned + // and don't have APK concepts like origin or replaces. + ecoGroups := make([]*group, 0, len(bc.ecosystemPkgs)) + for _, ep := range bc.ecosystemPkgs { + owner := ep.OwnerName() + ecoGroups = append(ecoGroups, &group{ + owners: []string{owner}, + size: ep.InstalledSize, + tiebreaker: owner, + }) + } + + // Combine all groups and apply the shared budget. + allGroups := append(apkGroups, ecoGroups...) + groups := applyBudget(allGroups, bc.ic.Layering.Budget) log.Infof("Building %d layers with budget %d", len(groups), bc.ic.Layering.Budget) for i, g := range groups { @@ -82,6 +100,13 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { for _, pkg := range g.pkgs { log.Infof(" - %s=%s", pkg.Name, pkg.Version) } + for _, owner := range g.owners { + // Ecosystem owners are namespaced with a colon (e.g. "python:flask"), + // APK owners are bare package names logged above via g.pkgs. + if strings.Contains(owner, ":") { + log.Infof(" - %s", owner) + } + } } // Then partition that single fs.FS into multiple layers based on our layering strategy. @@ -117,6 +142,16 @@ func replacesGroup(rep string, g *group) (bool, error) { } func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { + groups, err := groupAPKByOrigin(pkgs) + if err != nil { + return nil, err + } + return applyBudget(groups, budget), nil +} + +// groupAPKByOrigin groups APK packages by origin and merges replaces relationships. +// It populates both pkgs and owners on each group. Does not apply budget. +func groupAPKByOrigin(pkgs []*apk.Package) ([]*group, error) { // First, we're going to group packages by their origin. byOrigin := map[string]*group{} for _, pkg := range pkgs { @@ -131,6 +166,7 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { } g.pkgs = append(g.pkgs, pkg) + g.owners = append(g.owners, pkg.Name) } // Then we need to merge any packages that replace each other. @@ -189,9 +225,8 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { } } - // Now we need to pick the best groups to keep. - // First pass we'll set the size of each group to the sum of the installed size of all its packages. - groups := make([]*group, 0, budget) + // Compute sizes and deduplicate groups. + groups := make([]*group, 0) seen := map[*group]struct{}{} for v := range maps.Values(byOrigin) { if _, ok := seen[v]; ok { @@ -207,7 +242,14 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { } } - // Then we'll sort by the size and take the top $budget, merging the remainders. + return groups, nil +} + +// applyBudget sorts groups by size descending and merges anything beyond +// the budget into a single overflow group. It also sorts owners/packages +// within each group for consistency. +func applyBudget(groups []*group, budget int) []*group { + // Sort by the size and take the top $budget, merging the remainders. slices.SortFunc(groups, func(a, b *group) int { return cmp.Or( cmp.Compare(b.size, a.size), // Descending size. @@ -223,19 +265,27 @@ func groupByOriginAndSize(pkgs []*apk.Package, budget int) ([]*group, error) { groups = append(groups, merge(remainder...)) } - // Sort packages too just so they're in a consistent order. + // Sort packages and owners for consistent order. for _, g := range groups { slices.SortFunc(g.pkgs, func(a, b *apk.Package) int { return cmp.Compare(a.Name, b.Name) }) + slices.Sort(g.owners) } - return groups, nil + return groups } type group struct { + // pkgs holds APK packages in this group (used for installed DB splitting). pkgs []*apk.Package + // owners holds all owner names in this group. + // For APK packages this is the package name, for ecosystem packages + // this is the owner string (e.g. "python:flask"). + // Used by splitLayers to route files to the correct layer writer. + owners []string + size uint64 // This is silly but in the event that two groups have identical size, @@ -247,6 +297,7 @@ func merge(groups ...*group) *group { merged := &group{} for _, g := range groups { merged.pkgs = slices.Concat(merged.pkgs, g.pkgs) + merged.owners = slices.Concat(merged.owners, g.owners) merged.size += g.size merged.tiebreaker = max(merged.tiebreaker, g.tiebreaker) } @@ -256,8 +307,8 @@ func merge(groups ...*group) *group { func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToDiff map[*apk.Package][]byte, tmpdir string) ([]v1.Layer, error) { buf := make([]byte, 1<<20) - // We'll create a writer for each layer and a map to quickly access the writer given a package or group. - packageToWriter := map[string]*layerWriter{} + // We'll create a writer for each layer and a map to quickly access the writer given an owner name or group. + ownerToWriter := map[string]*layerWriter{} groupToWriter := map[*group]*layerWriter{} for _, g := range groups { @@ -270,8 +321,8 @@ func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToD w := newLayerWriter(f) groupToWriter[g] = w - for _, pkg := range g.pkgs { - packageToWriter[pkg.Name] = w + for _, owner := range g.owners { + ownerToWriter[owner] = w } } @@ -314,15 +365,17 @@ func splitLayers(ctx context.Context, fsys apkfs.FullFS, groups []*group, pkgToD // By default, all files go into the top layer. w := top - // However, if a file implements an extension interface that tells us what package owns it, + // However, if a file implements an extension interface that tells us who owns it, // we can use that to determine which layer it belongs to (if any). - if pkger, ok := f.info.(interface { - Package() *apk.Package + // Owner() returns the APK package name for APK-installed files, or the + // ecosystem owner string (e.g. "python:flask") for ecosystem files. + if ownr, ok := f.info.(interface { + Owner() string }); ok { - if pkg := pkger.Package(); pkg != nil { - w, ok = packageToWriter[pkg.Name] + if name := ownr.Owner(); name != "" { + w, ok = ownerToWriter[name] if !ok { - panic(fmt.Errorf("packageToWriter[%q] missing", pkg.Name)) + panic(fmt.Errorf("ownerToWriter[%q] missing", name)) } } } diff --git a/pkg/build/layers_test.go b/pkg/build/layers_test.go index bd5d00243..228f62a70 100644 --- a/pkg/build/layers_test.go +++ b/pkg/build/layers_test.go @@ -25,6 +25,7 @@ import ( "chainguard.dev/apko/pkg/apk/apk" apkfs "chainguard.dev/apko/pkg/apk/fs" + "chainguard.dev/apko/pkg/tarfs" ) func size(pkgs ...*apk.Package) uint64 { @@ -261,8 +262,8 @@ func TestSplitLayersDirectoryCreation(t *testing.T) { // Create package groups (this will result in multiple layers) groups := []*group{ - {pkgs: []*apk.Package{pkg1}, size: 1000, tiebreaker: "pkg1"}, - {pkgs: []*apk.Package{pkg2}, size: 2000, tiebreaker: "pkg2"}, + {pkgs: []*apk.Package{pkg1}, owners: []string{"pkg1"}, size: 1000, tiebreaker: "pkg1"}, + {pkgs: []*apk.Package{pkg2}, owners: []string{"pkg2"}, size: 2000, tiebreaker: "pkg2"}, } // Create package diffs (minimal content for each package) @@ -348,3 +349,133 @@ func TestSplitLayersDirectoryCreation(t *testing.T) { } } } + +func TestApplyBudgetWithEcosystemGroups(t *testing.T) { + // Simulate APK groups and ecosystem groups competing for budget. + apkGroup := &group{ + pkgs: []*apk.Package{{Name: "glibc", Origin: "glibc", InstalledSize: 6000000}}, + owners: []string{"glibc"}, + size: 6000000, + tiebreaker: "glibc", + } + ecoGroup1 := &group{ + owners: []string{"python:flask"}, + size: 500000, + tiebreaker: "python:flask", + } + ecoGroup2 := &group{ + owners: []string{"python:requests"}, + size: 300000, + tiebreaker: "python:requests", + } + + groups := applyBudget([]*group{apkGroup, ecoGroup1, ecoGroup2}, 3) + if len(groups) != 3 { + t.Fatalf("expected 3 groups, got %d", len(groups)) + } + // Should be sorted by size descending: glibc, flask, requests + if groups[0].owners[0] != "glibc" { + t.Errorf("expected glibc first, got %v", groups[0].owners) + } + if groups[1].owners[0] != "python:flask" { + t.Errorf("expected python:flask second, got %v", groups[1].owners) + } + if groups[2].owners[0] != "python:requests" { + t.Errorf("expected python:requests third, got %v", groups[2].owners) + } + + // With budget=2, the smallest should overflow into the last group. + groups = applyBudget([]*group{ + {owners: []string{"glibc"}, size: 6000000, tiebreaker: "glibc"}, + {owners: []string{"python:flask"}, size: 500000, tiebreaker: "python:flask"}, + {owners: []string{"python:requests"}, size: 300000, tiebreaker: "python:requests"}, + }, 2) + if len(groups) != 2 { + t.Fatalf("expected 2 groups, got %d", len(groups)) + } + if groups[0].owners[0] != "glibc" { + t.Errorf("expected glibc first, got %v", groups[0].owners) + } + // The overflow group should contain both ecosystem packages. + if len(groups[1].owners) != 2 { + t.Errorf("expected 2 owners in overflow, got %d: %v", len(groups[1].owners), groups[1].owners) + } +} + +func TestSplitLayersWithEcosystemOwners(t *testing.T) { + // Use tarfs which supports Owner() on file info. + fsys := tarfs.New() + + // Create some APK-like content (without actual package ownership). + if err := fsys.MkdirAll("usr/lib/apk/db", 0755); err != nil { + t.Fatal(err) + } + if err := fsys.WriteFile("usr/lib/apk/db/installed", []byte(""), 0644); err != nil { + t.Fatal(err) + } + + // Simulate ecosystem package file installation with owner tagging. + fsys.SetCurrentOwner("python:flask") + if err := fsys.MkdirAll("usr/lib/python3.12/site-packages/flask", 0755); err != nil { + t.Fatal(err) + } + if err := fsys.WriteFile("usr/lib/python3.12/site-packages/flask/__init__.py", []byte("# flask init"), 0644); err != nil { + t.Fatal(err) + } + fsys.SetCurrentOwner("") + + fsys.SetCurrentOwner("python:requests") + if err := fsys.MkdirAll("usr/lib/python3.12/site-packages/requests", 0755); err != nil { + t.Fatal(err) + } + if err := fsys.WriteFile("usr/lib/python3.12/site-packages/requests/__init__.py", []byte("# requests init"), 0644); err != nil { + t.Fatal(err) + } + fsys.SetCurrentOwner("") + + // Create groups: one for each ecosystem package. + groups := []*group{ + {owners: []string{"python:flask"}, size: 100, tiebreaker: "python:flask"}, + {owners: []string{"python:requests"}, size: 200, tiebreaker: "python:requests"}, + } + + tmpDir := t.TempDir() + pkgToDiff := map[*apk.Package][]byte{} + + ctx := context.Background() + layers, err := splitLayers(ctx, fsys, groups, pkgToDiff, tmpDir) + if err != nil { + t.Fatalf("splitLayers failed: %v", err) + } + + // 2 ecosystem groups + 1 top layer = 3 layers + if len(layers) != 3 { + t.Fatalf("expected 3 layers, got %d", len(layers)) + } + + // Check that flask files ended up in layer 0 and requests in layer 1. + for i, want := range []string{"flask", "requests"} { + rc, err := layers[i].Uncompressed() + if err != nil { + t.Fatal(err) + } + tr := tar.NewReader(rc) + found := false + for { + hdr, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + if hdr.Name == fmt.Sprintf("usr/lib/python3.12/site-packages/%s/__init__.py", want) { + found = true + } + } + rc.Close() + if !found { + t.Errorf("layer %d missing %s/__init__.py", i, want) + } + } +} diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go index 0c9de3fb9..443451307 100644 --- a/pkg/ecosystem/ecosystem.go +++ b/pkg/ecosystem/ecosystem.go @@ -34,6 +34,17 @@ type ResolvedPackage struct { Checksum string // "sha256:" SignatureURL string // optional: signature bundle URL (from data-signature) ProvenanceURL string // optional: provenance data URL (from data-provenance) + + // InstalledSize is populated after installation with the approximate + // bytes written for this package. Used for layering budget decisions. + InstalledSize uint64 +} + +// OwnerName returns the namespaced owner string used for filesystem tagging +// and layer routing (e.g. "python:flask"). The colon ensures no collision +// with APK package names. +func (rp ResolvedPackage) OwnerName() string { + return rp.Ecosystem + ":" + rp.Name } // Installer is the interface that ecosystem package installers must implement. @@ -96,29 +107,46 @@ func Get(name string) (Installer, bool) { return factory(), true } +// OwnerTagger is implemented by filesystems that support tagging files +// with an owner name for layering purposes. +type OwnerTagger interface { + SetCurrentOwner(owner string) + OwnerSize(owner string) uint64 +} + // InstallAll installs packages for all configured ecosystems. -// Returns environment variables that should be set in the image configuration. -func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) (map[string]string, error) { +// Returns environment variables, the resolved packages with InstalledSize +// populated, and any error. +// +// Installers are responsible for tagging files with per-package ownership +// via the OwnerTagger interface on the filesystem, if supported. +func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) (map[string]string, []ResolvedPackage, error) { env := map[string]string{} + var installed []ResolvedPackage + for name, config := range ecosystems { installer, ok := Get(name) if !ok { - return nil, fmt.Errorf("unknown ecosystem: %s", name) + return nil, nil, fmt.Errorf("unknown ecosystem: %s", name) } resolved, err := installer.Resolve(ctx, config, arch, a) if err != nil { - return nil, fmt.Errorf("resolving %s packages: %w", name, err) + return nil, nil, fmt.Errorf("resolving %s packages: %w", name, err) } if len(resolved) == 0 { continue } + vars, err := installer.Install(ctx, fs, resolved, config, a) if err != nil { - return nil, fmt.Errorf("installing %s packages: %w", name, err) + return nil, nil, fmt.Errorf("installing %s packages: %w", name, err) } + + installed = append(installed, resolved...) + for k, v := range vars { env[k] = v } } - return env, nil + return env, installed, nil } diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go index 8f27b76bb..31e36ba98 100644 --- a/pkg/ecosystem/python/python.go +++ b/pkg/ecosystem/python/python.go @@ -103,9 +103,15 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e return nil, fmt.Errorf("creating site-packages directory: %w", err) } - for _, pkg := range packages { + tagger, _ := fsys.(ecosystem.OwnerTagger) + + for idx, pkg := range packages { log.Infof("installing python package %s==%s", pkg.Name, pkg.Version) + if tagger != nil { + tagger.SetCurrentOwner(pkg.OwnerName()) + } + data, err := downloadWheel(ctx, pkg.URL, a) if err != nil { return nil, fmt.Errorf("downloading %s: %w", pkg.Name, err) @@ -128,6 +134,11 @@ func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []e log.Debugf("could not write SBOM for %s: %v", pkg.Name, err) } } + + if tagger != nil { + tagger.SetCurrentOwner("") + packages[idx].InstalledSize = tagger.OwnerSize(pkg.OwnerName()) + } } // When using a venv, set VIRTUAL_ENV and prepend its bin/ to PATH. diff --git a/pkg/tarfs/fs.go b/pkg/tarfs/fs.go index c37756b54..cb14b6ca9 100644 --- a/pkg/tarfs/fs.go +++ b/pkg/tarfs/fs.go @@ -59,6 +59,29 @@ type tarEntry struct { type memFS struct { tree *node + + // currentOwner is the owner name to stamp on new nodes. + // Set via SetCurrentOwner during ecosystem package installation. + currentOwner string + + // ownerSizes tracks the cumulative bytes written per owner, + // used to estimate installed size for layering budget decisions. + ownerSizes map[string]uint64 +} + +// SetCurrentOwner sets the owner name for any new filesystem nodes +// created via MkdirAll, WriteFile, Symlink, etc. Pass "" to clear. +// This is used by ecosystem package installers to tag files for layering. +func (m *memFS) SetCurrentOwner(owner string) { + m.currentOwner = owner +} + +// OwnerSize returns the total bytes written for the given owner. +func (m *memFS) OwnerSize(owner string) uint64 { + if m.ownerSizes == nil { + return 0 + } + return m.ownerSizes[owner] } func New() *memFS { @@ -267,6 +290,7 @@ func (m *memFS) Mkdir(path string, perms fs.FileMode) error { children: map[string]*node{}, xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, + owner: m.currentOwner, } return nil } @@ -316,6 +340,7 @@ func (m *memFS) MkdirAll(path string, perm fs.FileMode) error { children: map[string]*node{}, xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, + owner: m.currentOwner, } anode.children[part] = newnode } @@ -386,6 +411,7 @@ func (m *memFS) openFile(name string, flag int, perm fs.FileMode, linkCount int) dir: false, xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, + owner: m.currentOwner, } parentAnode.children[base] = anode } @@ -599,6 +625,7 @@ func (m *memFS) Mknod(path string, mode uint32, dev int) error { xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, modTime: anode.modTime, + owner: m.currentOwner, } return nil @@ -677,6 +704,7 @@ func (m *memFS) Symlink(oldname, newname string) error { xattrs: map[string][]byte{}, hardlinks: map[string]*tar.Header{}, modTime: anode.modTime, + owner: m.currentOwner, } return nil } @@ -949,6 +977,15 @@ func (f *memFile) Write(p []byte) (n int, err error) { copy(f.node.data[f.offset:], p) } f.offset += int64(len(p)) + + // Track installed size per owner for ecosystem package layering. + if f.node.owner != "" && f.fs != nil { + if f.fs.ownerSizes == nil { + f.fs.ownerSizes = map[string]uint64{} + } + f.fs.ownerSizes[f.node.owner] += uint64(len(p)) + } + return len(p), nil } @@ -970,6 +1007,10 @@ type node struct { // This stores metadata for a tarfs-backed file. te *tarEntry + + // owner is set for files created by ecosystem package installers + // to track ownership for layering purposes. + owner string } func (n *node) fileInfo(parent, name string) fs.FileInfo { @@ -1035,3 +1076,13 @@ func (m *memFileInfo) Package() *apk.Package { return m.te.pkg } + +// Owner returns the name of the owner of this file. +// For APK-installed files, this is the package name. +// For ecosystem-installed files, this is the owner string set during install. +func (m *memFileInfo) Owner() string { + if m.te != nil && m.te.pkg != nil { + return m.te.pkg.Name + } + return m.owner +} From 9a56bdd10caeb6f86d21af8e85e587bfea09445a Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Mon, 6 Apr 2026 15:22:58 -0600 Subject: [PATCH 08/12] fix: address lint issues in ecosystem layering - Fix appendAssign: use apkGroups directly instead of allGroups - Replace map loop with maps.Copy - Fix import ordering Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/build/layers.go | 4 ++-- pkg/ecosystem/ecosystem.go | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pkg/build/layers.go b/pkg/build/layers.go index 321b80479..11c0fca4d 100644 --- a/pkg/build/layers.go +++ b/pkg/build/layers.go @@ -90,8 +90,8 @@ func (bc *Context) buildLayers(ctx context.Context) ([]v1.Layer, error) { } // Combine all groups and apply the shared budget. - allGroups := append(apkGroups, ecoGroups...) - groups := applyBudget(allGroups, bc.ic.Layering.Budget) + apkGroups = append(apkGroups, ecoGroups...) + groups := applyBudget(apkGroups, bc.ic.Layering.Budget) log.Infof("Building %d layers with budget %d", len(groups), bc.ic.Layering.Budget) for i, g := range groups { diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go index 443451307..1a6f12a03 100644 --- a/pkg/ecosystem/ecosystem.go +++ b/pkg/ecosystem/ecosystem.go @@ -17,10 +17,11 @@ package ecosystem import ( "context" "fmt" + "maps" "sync" - apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/apk/auth" + apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/build/types" ) @@ -144,9 +145,7 @@ func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]type installed = append(installed, resolved...) - for k, v := range vars { - env[k] = v - } + maps.Copy(env, vars) } return env, installed, nil } From 6709896a6f297cbdc117a9ad089b9b0e865a9c54 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Mon, 6 Apr 2026 15:26:52 -0600 Subject: [PATCH 09/12] fix: wire auth into ecosystem package resolution and installation Both ecosystem callers were passing nil for the authenticator, meaning private Python indexes requiring authentication would fail. Use bc.o.Auth (from options) in the build path and auth.DefaultAuthenticators in the lock path, matching how APK repository auth is handled. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/cli/lock.go | 2 +- pkg/build/build_implementation.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/cli/lock.go b/internal/cli/lock.go index dcf3f845a..92f673727 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -254,7 +254,7 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt return fmt.Errorf("unknown ecosystem: %s", name) } for _, arch := range archs { - resolved, err := installer.Resolve(ctx, ecoConfig, arch, nil) + resolved, err := installer.Resolve(ctx, ecoConfig, arch, auth.DefaultAuthenticators) if err != nil { return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err) } diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index e74928bfe..df63c6cea 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -182,7 +182,7 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) // Install ecosystem packages (python, etc.) after APK packages so that // the language runtime is available for version detection. if len(bc.ic.Contents.Ecosystems) > 0 { - env, ecoPkgs, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch, nil) + env, ecoPkgs, err := ecosystem.InstallAll(ctx, bc.fs, bc.ic.Contents.Ecosystems, bc.o.Arch, bc.o.Auth) if err != nil { return nil, fmt.Errorf("installing ecosystem packages: %w", err) } From 61c708bd87fc329ab4f9dee24f35c07362ae40d2 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Mon, 6 Apr 2026 16:24:53 -0600 Subject: [PATCH 10/12] fix: Address all lint failures Signed-off-by: RJ Sampson --- pkg/build/build_implementation.go | 5 ++-- pkg/build/types/schema.json | 36 +++++++++++++++++++++++++++ pkg/ecosystem/python/platform.go | 27 ++++++++++---------- pkg/ecosystem/python/platform_test.go | 16 ++++++------ pkg/ecosystem/python/python.go | 8 +++--- pkg/ecosystem/python/python_test.go | 1 - pkg/ecosystem/python/resolve.go | 26 +++++++++---------- pkg/ecosystem/python/resolve_test.go | 10 +++++--- pkg/ecosystem/python/sbom.go | 14 +++++------ pkg/ecosystem/python/wheel.go | 8 +++++- pkg/ecosystem/python/wheel_test.go | 8 +++--- pkg/lock/lock.go | 8 +++--- 12 files changed, 103 insertions(+), 64 deletions(-) diff --git a/pkg/build/build_implementation.go b/pkg/build/build_implementation.go index df63c6cea..fb67df191 100644 --- a/pkg/build/build_implementation.go +++ b/pkg/build/build_implementation.go @@ -23,6 +23,7 @@ import ( "encoding/json" "fmt" "io" + "maps" "os" "path/filepath" "runtime" @@ -191,9 +192,7 @@ func (bc *Context) buildImage(ctx context.Context) ([]apk.InstalledDiff, error) if bc.ic.Environment == nil { bc.ic.Environment = make(map[string]string) } - for k, v := range env { - bc.ic.Environment[k] = v - } + maps.Copy(bc.ic.Environment, env) } } diff --git a/pkg/build/types/schema.json b/pkg/build/types/schema.json index 071b0c7ce..cdbede06d 100644 --- a/pkg/build/types/schema.json +++ b/pkg/build/types/schema.json @@ -31,6 +31,35 @@ "additionalProperties": false, "type": "object" }, + "EcosystemConfig": { + "properties": { + "indexes": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Indexes is a list of package index URLs (e.g., PyPI simple API URLs)." + }, + "packages": { + "items": { + "type": "string" + }, + "type": "array", + "description": "Packages is a list of package specifications (e.g., \"flask==3.0.0\")." + }, + "python_version": { + "type": "string", + "description": "PythonVersion overrides auto-detection of the Python version (e.g., \"3.12\")." + }, + "venv": { + "type": "string", + "description": "Venv is an optional path for a virtual environment (e.g., \"/app/venv\").\nWhen set, packages are installed into the venv instead of the system site-packages,\nand VIRTUAL_ENV / PATH are set automatically." + } + }, + "additionalProperties": false, + "type": "object", + "description": "EcosystemConfig holds configuration for a non-APK package ecosystem (e.g., python)." + }, "Group": { "properties": { "groupname": { @@ -217,6 +246,13 @@ "baseimage": { "$ref": "#/$defs/BaseImageDescriptor", "description": "Optional: Base image to build on top of. Warning: Experimental." + }, + "ecosystems": { + "additionalProperties": { + "$ref": "#/$defs/EcosystemConfig" + }, + "type": "object", + "description": "Optional: Non-APK ecosystem packages to install (e.g., pip packages)." } }, "additionalProperties": false, diff --git a/pkg/ecosystem/python/platform.go b/pkg/ecosystem/python/platform.go index c1e9dd06b..c9c4ccfcd 100644 --- a/pkg/ecosystem/python/platform.go +++ b/pkg/ecosystem/python/platform.go @@ -16,6 +16,7 @@ package python import ( "fmt" + "slices" "strings" "chainguard.dev/apko/pkg/build/types" @@ -143,7 +144,7 @@ func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Archit // E.g., "py3", "cp312", "py2.py3" func isCompatiblePythonTag(tag, pythonVersion string) bool { cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") - for _, t := range strings.Split(tag, ".") { + for t := range strings.SplitSeq(tag, ".") { if t == "py3" || t == "py2.py3" || t == cpTag { return true } @@ -157,7 +158,7 @@ func isCompatibleABI(tag, pythonVersion string) bool { return true } cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") - for _, t := range strings.Split(tag, ".") { + for t := range strings.SplitSeq(tag, ".") { if t == "abi3" || t == cpTag { return true } @@ -171,11 +172,9 @@ func isCompatiblePlatform(tag string, arch types.Architecture) bool { return true } compatible := platformTags(arch) - for _, t := range strings.Split(tag, ".") { - for _, c := range compatible { - if t == c { - return true - } + for t := range strings.SplitSeq(tag, ".") { + if slices.Contains(compatible, t) { + return true } } return false @@ -188,7 +187,7 @@ func wheelScore(w wheelFileParts, pythonVersion string, arch types.Architecture) // Prefer exact CPython tag over generic py3 cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") - for _, t := range strings.Split(w.PythonTag, ".") { + for t := range strings.SplitSeq(w.PythonTag, ".") { if t == cpTag { score += 100 break @@ -196,10 +195,11 @@ func wheelScore(w wheelFileParts, pythonVersion string, arch types.Architecture) } // Prefer specific ABI over none/abi3 - for _, t := range strings.Split(w.ABITag, ".") { - if t == cpTag { + for t := range strings.SplitSeq(w.ABITag, ".") { + switch t { + case cpTag: score += 50 - } else if t == "abi3" { + case "abi3": score += 25 } } @@ -208,11 +208,10 @@ func wheelScore(w wheelFileParts, pythonVersion string, arch types.Architecture) if w.PlatformTag != "any" { platTags := platformTags(arch) for i, pt := range platTags { - for _, t := range strings.Split(w.PlatformTag, ".") { - if t == pt { + for pp := range strings.SplitSeq(w.PlatformTag, ".") { + if pp == pt { // More specific platforms (earlier in list) get higher scores score += 10 * (len(platTags) - i) - break } } } diff --git a/pkg/ecosystem/python/platform_test.go b/pkg/ecosystem/python/platform_test.go index 7edd62ed9..275873a88 100644 --- a/pkg/ecosystem/python/platform_test.go +++ b/pkg/ecosystem/python/platform_test.go @@ -22,9 +22,9 @@ import ( func TestPlatformTags(t *testing.T) { tests := []struct { - arch string - wantLen int - wantAny string // At least one tag should contain this + arch string + wantLen int + wantAny string // At least one tag should contain this }{ {"amd64", 5, "x86_64"}, {"arm64", 3, "aarch64"}, @@ -136,11 +136,11 @@ func TestParseWheelFilename(t *testing.T) { func TestIsCompatibleWheel(t *testing.T) { tests := []struct { - name string - wheel wheelFileParts - pyVer string - arch string - want bool + name string + wheel wheelFileParts + pyVer string + arch string + want bool }{ { name: "pure python wheel is always compatible", diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go index 31e36ba98..6c6a197bf 100644 --- a/pkg/ecosystem/python/python.go +++ b/pkg/ecosystem/python/python.go @@ -24,8 +24,8 @@ import ( "github.com/chainguard-dev/clog" - apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/apk/auth" + apkfs "chainguard.dev/apko/pkg/apk/fs" "chainguard.dev/apko/pkg/build/types" "chainguard.dev/apko/pkg/ecosystem" ) @@ -181,9 +181,9 @@ func createVenv(fsys apkfs.FullFS, venvPath, pythonVersion string) error { pythonBin := "/usr/bin/python" + pythonVersion binPath := filepath.Join(venvPath, "bin") symlinks := map[string]string{ - "python": pythonBin, - "python3": pythonBin, - "python" + pythonVersion: pythonBin, + "python": pythonBin, + "python3": pythonBin, + "python" + pythonVersion: pythonBin, } for name, target := range symlinks { linkPath := filepath.Join(binPath, name) diff --git a/pkg/ecosystem/python/python_test.go b/pkg/ecosystem/python/python_test.go index dd85cb305..9703819f9 100644 --- a/pkg/ecosystem/python/python_test.go +++ b/pkg/ecosystem/python/python_test.go @@ -69,7 +69,6 @@ func TestCreateVenv(t *testing.T) { } } - func TestInstallerRegistration(t *testing.T) { inst, ok := ecosystem.Get("python") if !ok { diff --git a/pkg/ecosystem/python/resolve.go b/pkg/ecosystem/python/resolve.go index 52cabf1b3..0b2f72fd3 100644 --- a/pkg/ecosystem/python/resolve.go +++ b/pkg/ecosystem/python/resolve.go @@ -131,14 +131,14 @@ func normalizeName(name string) string { // pypiPackageJSON is the response from https://pypi.org/pypi/{name}/{version}/json type pypiPackageJSON struct { - Info pypiInfo `json:"info"` - URLs []pypiURL `json:"urls"` + Info pypiInfo `json:"info"` + URLs []pypiURL `json:"urls"` } type pypiInfo struct { - Name string `json:"name"` - Version string `json:"version"` - RequiresDist []string `json:"requires_dist"` + Name string `json:"name"` + Version string `json:"version"` + RequiresDist []string `json:"requires_dist"` } type pypiURL struct { @@ -300,7 +300,7 @@ func resolveJSONVersion(ctx context.Context, normalizedName, originalName, versi } // Parse dependencies from requires_dist - var deps []packageSpec + deps := make([]packageSpec, 0, len(pkgResp.Info.RequiresDist)) for _, req := range pkgResp.Info.RequiresDist { dep := parsePackageSpec(req) if dep.Markers != "" && !evaluateMarkers(dep.Markers, nil) { @@ -381,8 +381,6 @@ type wheelLink struct { // parseSimpleIndex parses the HTML from a PEP 503 Simple Repository API response. func parseSimpleIndex(body string, baseURL string) []wheelLink { - var links []wheelLink - // Use a regex that handles '>' inside quoted attribute values (e.g., data-requires-python=">=3.0"). // The [^>]* approach breaks when attributes contain '>' characters. linkRe := regexp.MustCompile(`"]*(?:"[^"]*")?)*href="([^"]*)"(?:[^>"]*(?:"[^"]*")?)*>([^<]*)`) @@ -390,7 +388,9 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { provenanceRe := regexp.MustCompile(`data-provenance="([^"]*)"`) signatureRe := regexp.MustCompile(`data-signature="([^"]*)"`) - for _, match := range linkRe.FindAllStringSubmatch(body, -1) { + matches := linkRe.FindAllStringSubmatch(body, -1) + links := make([]wheelLink, 0, len(matches)) + for _, match := range matches { href := match[1] filename := strings.TrimSpace(match[2]) @@ -538,8 +538,8 @@ func extractDepsFromWheel(ctx context.Context, url string, a auth.Authenticator) // parseRequiresDist extracts Requires-Dist entries from wheel METADATA content. func parseRequiresDist(metadata string) []packageSpec { - var deps []packageSpec - for _, line := range strings.Split(metadata, "\n") { + deps := make([]packageSpec, 0, strings.Count(metadata, "Requires-Dist: ")) + for line := range strings.SplitSeq(metadata, "\n") { line = strings.TrimRight(line, "\r") if !strings.HasPrefix(line, "Requires-Dist: ") { continue @@ -644,9 +644,7 @@ func compareVersions(a, b string) int { bParts := strings.Split(b, ".") maxLen := len(aParts) - if len(bParts) > maxLen { - maxLen = len(bParts) - } + maxLen = max(maxLen, len(bParts)) for i := 0; i < maxLen; i++ { var aVal, bVal string diff --git a/pkg/ecosystem/python/resolve_test.go b/pkg/ecosystem/python/resolve_test.go index 711a5bd6b..fc706d7c0 100644 --- a/pkg/ecosystem/python/resolve_test.go +++ b/pkg/ecosystem/python/resolve_test.go @@ -19,6 +19,7 @@ import ( "encoding/json" "net/http" "net/http/httptest" + "strings" "testing" "chainguard.dev/apko/pkg/build/types" @@ -265,12 +266,13 @@ func servePyPIJSON(t *testing.T, packages map[string]pypiPackageJSON) *httptest. // Serve Simple API as fallback mux.HandleFunc("/simple/"+name+"/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") - html := "\n" + var b strings.Builder + b.WriteString("\n") for _, u := range pkg.URLs { - html += `` + u.Filename + "\n" + b.WriteString(`` + u.Filename + "\n") } - html += "" - w.Write([]byte(html)) + b.WriteString("") + w.Write([]byte(b.String())) }) } return httptest.NewServer(mux) diff --git a/pkg/ecosystem/python/sbom.go b/pkg/ecosystem/python/sbom.go index a7b8cd928..d00ec1c0b 100644 --- a/pkg/ecosystem/python/sbom.go +++ b/pkg/ecosystem/python/sbom.go @@ -68,13 +68,13 @@ func writePackageSBOM(fsys apkfs.FullFS, sitePackagesPath string, wheelData []by // spdxDocument is a minimal SPDX 2.3 JSON document structure. type spdxDocument struct { - SPDXVersion string `json:"spdxVersion"` - DataLicense string `json:"dataLicense"` - SPDXID string `json:"SPDXID"` - Name string `json:"name"` - Namespace string `json:"documentNamespace"` - CreationInfo spdxCreationInfo `json:"creationInfo"` - Packages []spdxPackage `json:"packages"` + SPDXVersion string `json:"spdxVersion"` + DataLicense string `json:"dataLicense"` + SPDXID string `json:"SPDXID"` + Name string `json:"name"` + Namespace string `json:"documentNamespace"` + CreationInfo spdxCreationInfo `json:"creationInfo"` + Packages []spdxPackage `json:"packages"` } type spdxCreationInfo struct { diff --git a/pkg/ecosystem/python/wheel.go b/pkg/ecosystem/python/wheel.go index 6ade0eeb5..50c6ff2a0 100644 --- a/pkg/ecosystem/python/wheel.go +++ b/pkg/ecosystem/python/wheel.go @@ -19,6 +19,7 @@ import ( "bytes" "crypto/sha256" "encoding/hex" + "errors" "fmt" "io" "path/filepath" @@ -35,8 +36,13 @@ func extractWheel(fsys apkfs.FullFS, wheelData []byte, sitePackagesPath string) return fmt.Errorf("opening wheel as zip: %w", err) } + cleanBase := filepath.Clean(sitePackagesPath) + string(filepath.Separator) for _, f := range reader.File { - targetPath := filepath.Join(sitePackagesPath, f.Name) + // G305: Protect against zip slip / path traversal. + targetPath := filepath.Join(sitePackagesPath, filepath.Clean(f.Name)) + if !strings.HasPrefix(targetPath, cleanBase) { + return errors.New("illegal file path in wheel archive: " + f.Name) + } if f.FileInfo().IsDir() { if err := fsys.MkdirAll(targetPath, 0755); err != nil { diff --git a/pkg/ecosystem/python/wheel_test.go b/pkg/ecosystem/python/wheel_test.go index 4d87c8a4a..5323da44a 100644 --- a/pkg/ecosystem/python/wheel_test.go +++ b/pkg/ecosystem/python/wheel_test.go @@ -45,8 +45,8 @@ func createTestWheel(t *testing.T, files map[string]string) []byte { func TestExtractWheel(t *testing.T) { wheelData := createTestWheel(t, map[string]string{ - "mypackage/__init__.py": "# init", - "mypackage/module.py": "def hello(): pass", + "mypackage/__init__.py": "# init", + "mypackage/module.py": "def hello(): pass", "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", "mypackage-1.0.0.dist-info/RECORD": "", }) @@ -81,8 +81,8 @@ func TestExtractWheel(t *testing.T) { func TestWriteInstallerFile(t *testing.T) { wheelData := createTestWheel(t, map[string]string{ - "mypackage/__init__.py": "# init", - "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", + "mypackage/__init__.py": "# init", + "mypackage-1.0.0.dist-info/METADATA": "Name: mypackage\nVersion: 1.0.0\n", }) fs := apkfs.NewMemFS() diff --git a/pkg/lock/lock.go b/pkg/lock/lock.go index 870661f8c..48f449b88 100644 --- a/pkg/lock/lock.go +++ b/pkg/lock/lock.go @@ -24,10 +24,10 @@ type Config struct { } type LockContents struct { - Keyrings []LockKeyring `json:"keyring"` - BuildRepositories []LockRepo `json:"build_repositories"` - RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` - Repositories []LockRepo `json:"repositories"` + Keyrings []LockKeyring `json:"keyring"` + BuildRepositories []LockRepo `json:"build_repositories"` + RuntimeOnlyRepositories []LockRepo `json:"runtime_repositories"` + Repositories []LockRepo `json:"repositories"` // Packages in order of installation -> for a single architecture. Packages []LockPkg `json:"packages"` EcosystemPackages []LockEcosystemPkg `json:"ecosystem_packages,omitempty"` From cbd33027e3ec3062fa10b791410a7486187a5133 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Mon, 6 Apr 2026 16:31:52 -0600 Subject: [PATCH 11/12] fix: eliminate outbound network calls in resolver tests Test data had wheel URLs pointing to files.example.com, causing DNS lookups that Harden-Runner blocks. Rewrite all test URLs to point back to the httptest server, which also serves dummy wheel responses for dependency extraction. Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/ecosystem/python/resolve_test.go | 57 ++++++++++++++++++++++------ 1 file changed, 46 insertions(+), 11 deletions(-) diff --git a/pkg/ecosystem/python/resolve_test.go b/pkg/ecosystem/python/resolve_test.go index fc706d7c0..4fa17dff1 100644 --- a/pkg/ecosystem/python/resolve_test.go +++ b/pkg/ecosystem/python/resolve_test.go @@ -241,23 +241,39 @@ func TestIsPreRelease(t *testing.T) { } // servePyPIJSON creates a mock server that serves PyPI JSON API responses. +// Wheel URLs in test data use a placeholder that gets replaced with the +// actual test server URL to avoid outbound network calls. func servePyPIJSON(t *testing.T, packages map[string]pypiPackageJSON) *httptest.Server { t.Helper() mux := http.NewServeMux() + + // Placeholder replaced with actual server URL after startup. + var serverURL string + for name, pkg := range packages { name := normalizeName(name) pkg := pkg // Serve /pypi/{name}/{version}/json mux.HandleFunc("/pypi/"+name+"/"+pkg.Info.Version+"/json", func(w http.ResponseWriter, r *http.Request) { - json.NewEncoder(w).Encode(pkg) + // Rewrite placeholder URLs to point to this test server. + resp := pkg + for i := range resp.URLs { + resp.URLs[i].URL = serverURL + "/wheels/" + resp.URLs[i].Filename + } + json.NewEncoder(w).Encode(resp) }) // Serve /pypi/{name}/json (versions listing) mux.HandleFunc("/pypi/"+name+"/json", func(w http.ResponseWriter, r *http.Request) { + urls := make([]pypiURL, len(pkg.URLs)) + copy(urls, pkg.URLs) + for i := range urls { + urls[i].URL = serverURL + "/wheels/" + urls[i].Filename + } resp := pypiVersionsJSON{ Releases: map[string][]pypiURL{ - pkg.Info.Version: pkg.URLs, + pkg.Info.Version: urls, }, } json.NewEncoder(w).Encode(resp) @@ -269,13 +285,21 @@ func servePyPIJSON(t *testing.T, packages map[string]pypiPackageJSON) *httptest. var b strings.Builder b.WriteString("\n") for _, u := range pkg.URLs { - b.WriteString(`` + u.Filename + "\n") + b.WriteString(`` + u.Filename + "\n") } b.WriteString("") w.Write([]byte(b.String())) }) } - return httptest.NewServer(mux) + + // Serve dummy wheel downloads (resolver fetches these to extract deps). + mux.HandleFunc("/wheels/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + + server := httptest.NewServer(mux) + serverURL = server.URL + return server } func TestResolveWithMockJSON(t *testing.T) { @@ -287,7 +311,7 @@ func TestResolveWithMockJSON(t *testing.T) { }, URLs: []pypiURL{{ Filename: "Flask-3.0.0-py3-none-any.whl", - URL: "https://files.example.com/Flask-3.0.0-py3-none-any.whl", + URL: "https://placeholder/Flask-3.0.0-py3-none-any.whl", PackageType: "bdist_wheel", Digests: pypiDigests{SHA256: "abc123"}, }}, @@ -334,7 +358,7 @@ func TestResolveTransitiveDeps(t *testing.T) { }, URLs: []pypiURL{{ Filename: "Flask-3.0.0-py3-none-any.whl", - URL: "https://files.example.com/Flask-3.0.0-py3-none-any.whl", + URL: "https://placeholder/Flask-3.0.0-py3-none-any.whl", PackageType: "bdist_wheel", Digests: pypiDigests{SHA256: "aaa"}, }}, @@ -349,7 +373,7 @@ func TestResolveTransitiveDeps(t *testing.T) { }, URLs: []pypiURL{{ Filename: "Werkzeug-3.0.1-py3-none-any.whl", - URL: "https://files.example.com/Werkzeug-3.0.1-py3-none-any.whl", + URL: "https://placeholder/Werkzeug-3.0.1-py3-none-any.whl", PackageType: "bdist_wheel", Digests: pypiDigests{SHA256: "bbb"}, }}, @@ -361,7 +385,7 @@ func TestResolveTransitiveDeps(t *testing.T) { }, URLs: []pypiURL{{ Filename: "click-8.1.7-py3-none-any.whl", - URL: "https://files.example.com/click-8.1.7-py3-none-any.whl", + URL: "https://placeholder/click-8.1.7-py3-none-any.whl", PackageType: "bdist_wheel", Digests: pypiDigests{SHA256: "ccc"}, }}, @@ -373,7 +397,7 @@ func TestResolveTransitiveDeps(t *testing.T) { }, URLs: []pypiURL{{ Filename: "MarkupSafe-2.1.5-py3-none-any.whl", - URL: "https://files.example.com/MarkupSafe-2.1.5-py3-none-any.whl", + URL: "https://placeholder/MarkupSafe-2.1.5-py3-none-any.whl", PackageType: "bdist_wheel", Digests: pypiDigests{SHA256: "ddd"}, }}, @@ -409,16 +433,27 @@ func TestResolveTransitiveDeps(t *testing.T) { } func TestResolveSimpleApiFallback(t *testing.T) { - // Test that non-PyPI indexes use the Simple API + // Test that non-PyPI indexes use the Simple API. + // All URLs must point to the test server to avoid outbound network calls. mux := http.NewServeMux() + + // Serve the simple index page; the wheel URL is set dynamically after the server starts. + var serverURL string mux.HandleFunc("/simple/mypackage/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") w.Write([]byte(` -mypackage-1.0.0-py3-none-any.whl +mypackage-1.0.0-py3-none-any.whl `)) }) + + // Serve a dummy wheel (the resolver downloads it to extract deps). + mux.HandleFunc("/wheels/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusOK) + }) + server := httptest.NewServer(mux) defer server.Close() + serverURL = server.URL specs := []packageSpec{{Name: "mypackage", Operator: "==", Version: "1.0.0"}} // Use a non-pypi index so it doesn't try the JSON API From 93e029bd475ca33a6da136fa5d19af6b6d6cfb73 Mon Sep 17 00:00:00 2001 From: RJ Sampson Date: Mon, 6 Apr 2026 16:39:37 -0600 Subject: [PATCH 12/12] feat: detect platform compatibility from wheel tags and image libc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stop hardcoding platform tag lists per architecture. Instead, parse wheel platform tags dynamically by checking the machine suffix and prefix (musllinux_, manylinux, linux_). Detect the image libc from /etc/os-release (ID=alpine → musl, otherwise glibc) and only accept wheels matching the correct libc. Replace the scoring system with simple binary-over-pure-python preference. Co-Authored-By: Claude Opus 4.6 (1M context) --- internal/cli/lock.go | 2 +- pkg/ecosystem/ecosystem.go | 23 +++- pkg/ecosystem/python/platform.go | 152 +++++++++----------------- pkg/ecosystem/python/platform_test.go | 144 ++++++++++++++---------- pkg/ecosystem/python/python.go | 4 +- pkg/ecosystem/python/python_test.go | 5 +- pkg/ecosystem/python/resolve.go | 43 +++----- pkg/ecosystem/python/resolve_test.go | 6 +- 8 files changed, 183 insertions(+), 196 deletions(-) diff --git a/internal/cli/lock.go b/internal/cli/lock.go index 92f673727..e09a7712d 100644 --- a/internal/cli/lock.go +++ b/internal/cli/lock.go @@ -254,7 +254,7 @@ func LockCmd(ctx context.Context, output string, archs []types.Architecture, opt return fmt.Errorf("unknown ecosystem: %s", name) } for _, arch := range archs { - resolved, err := installer.Resolve(ctx, ecoConfig, arch, auth.DefaultAuthenticators) + resolved, err := installer.Resolve(ctx, ecoConfig, arch, "glibc", auth.DefaultAuthenticators) if err != nil { return fmt.Errorf("resolving %s packages for %s: %w", name, arch, err) } diff --git a/pkg/ecosystem/ecosystem.go b/pkg/ecosystem/ecosystem.go index 1a6f12a03..f16c772fa 100644 --- a/pkg/ecosystem/ecosystem.go +++ b/pkg/ecosystem/ecosystem.go @@ -18,6 +18,7 @@ import ( "context" "fmt" "maps" + "strings" "sync" "chainguard.dev/apko/pkg/apk/auth" @@ -53,7 +54,8 @@ type Installer interface { // Name returns the ecosystem name (e.g., "python"). Name() string // Resolve resolves the requested packages to specific versions and URLs. - Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) ([]ResolvedPackage, error) + // libc is "musl" or "glibc", detected from the image filesystem. + Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, libc string, a auth.Authenticator) ([]ResolvedPackage, error) // Install extracts resolved packages into the filesystem. // Returns environment variables that should be set in the image configuration. Install(ctx context.Context, fs apkfs.FullFS, packages []ResolvedPackage, config types.EcosystemConfig, a auth.Authenticator) (map[string]string, error) @@ -108,6 +110,21 @@ func Get(name string) (Installer, bool) { return factory(), true } +// detectLibc checks /etc/os-release to determine the image's libc. +// Alpine uses musl; everything else uses glibc. +func detectLibc(fs apkfs.FullFS) string { + data, err := fs.ReadFile("etc/os-release") + if err != nil { + return "glibc" + } + for line := range strings.SplitSeq(string(data), "\n") { + if line == "ID=alpine" { + return "musl" + } + } + return "glibc" +} + // OwnerTagger is implemented by filesystems that support tagging files // with an owner name for layering purposes. type OwnerTagger interface { @@ -125,12 +142,14 @@ func InstallAll(ctx context.Context, fs apkfs.FullFS, ecosystems map[string]type env := map[string]string{} var installed []ResolvedPackage + libc := detectLibc(fs) + for name, config := range ecosystems { installer, ok := Get(name) if !ok { return nil, nil, fmt.Errorf("unknown ecosystem: %s", name) } - resolved, err := installer.Resolve(ctx, config, arch, a) + resolved, err := installer.Resolve(ctx, config, arch, libc, a) if err != nil { return nil, nil, fmt.Errorf("resolving %s packages: %w", name, err) } diff --git a/pkg/ecosystem/python/platform.go b/pkg/ecosystem/python/platform.go index c9c4ccfcd..d423c7014 100644 --- a/pkg/ecosystem/python/platform.go +++ b/pkg/ecosystem/python/platform.go @@ -16,69 +16,46 @@ package python import ( "fmt" - "slices" "strings" "chainguard.dev/apko/pkg/build/types" ) -// platformTags returns the list of compatible wheel platform tags for the -// given architecture, ordered from most specific to least specific. -func platformTags(arch types.Architecture) []string { - switch arch { - case types.ParseArchitecture("amd64"): - return []string{ - "manylinux_2_17_x86_64", - "manylinux2014_x86_64", - "manylinux_2_5_x86_64", - "manylinux1_x86_64", - "linux_x86_64", - } - case types.ParseArchitecture("arm64"): - return []string{ - "manylinux_2_17_aarch64", - "manylinux2014_aarch64", - "linux_aarch64", - } - case types.ParseArchitecture("arm/v7"): - return []string{ - "manylinux_2_17_armv7l", - "manylinux2014_armv7l", - "linux_armv7l", - } - case types.ParseArchitecture("arm/v6"): - return []string{ - "manylinux_2_17_armv6l", - "linux_armv6l", - } - case types.ParseArchitecture("386"): - return []string{ - "manylinux_2_17_i686", - "manylinux2014_i686", - "manylinux_2_5_i686", - "manylinux1_i686", - "linux_i686", - } - case types.ParseArchitecture("ppc64le"): - return []string{ - "manylinux_2_17_ppc64le", - "manylinux2014_ppc64le", - "linux_ppc64le", - } - case types.ParseArchitecture("s390x"): - return []string{ - "manylinux_2_17_s390x", - "manylinux2014_s390x", - "linux_s390x", - } - case types.ParseArchitecture("riscv64"): - return []string{ - "manylinux_2_17_riscv64", - "linux_riscv64", - } - default: - return []string{"any"} +// archToMachine maps OCI architecture strings to the Python/Linux machine +// string used in wheel platform tags. +var archToMachine = map[types.Architecture]string{ + types.ParseArchitecture("amd64"): "x86_64", + types.ParseArchitecture("arm64"): "aarch64", + types.ParseArchitecture("arm/v7"): "armv7l", + types.ParseArchitecture("arm/v6"): "armv6l", + types.ParseArchitecture("386"): "i686", + types.ParseArchitecture("ppc64le"): "ppc64le", + types.ParseArchitecture("s390x"): "s390x", + types.ParseArchitecture("riscv64"): "riscv64", + types.ParseArchitecture("loong64"): "loongarch64", +} + +// isLinuxPlatformTag checks whether a single platform tag (e.g. +// "musllinux_1_2_x86_64") targets the given machine architecture and +// is compatible with the image's libc. musl images only accept musllinux +// wheels; glibc images only accept manylinux wheels. +func isLinuxPlatformTag(tag, machine string, libc string) bool { + if !strings.HasSuffix(tag, "_"+machine) { + return false } + if tag == "linux_"+machine { + return true + } + if libc == "musl" { + return strings.HasPrefix(tag, "musllinux_") + } + return strings.HasPrefix(tag, "manylinux") +} + +// isBinaryWheel returns true if the wheel targets a specific platform +// (not pure-python "any"). +func isBinaryWheel(w wheelFileParts) bool { + return w.PlatformTag != "any" } // wheelFileParts holds the parsed components of a wheel filename per PEP 427. @@ -124,8 +101,8 @@ func parseWheelFilename(filename string) (wheelFileParts, error) { } // isCompatibleWheel checks whether a wheel file is compatible with the given -// Python version and architecture. -func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Architecture) bool { +// Python version, architecture, and libc. +func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Architecture, libc string) bool { // Check python tag compatibility if !isCompatiblePythonTag(w.PythonTag, pythonVersion) { return false @@ -137,7 +114,7 @@ func isCompatibleWheel(w wheelFileParts, pythonVersion string, arch types.Archit } // Check platform compatibility - return isCompatiblePlatform(w.PlatformTag, arch) + return isCompatiblePlatform(w.PlatformTag, arch, libc) } // isCompatiblePythonTag checks if the wheel's python tag is compatible. @@ -166,56 +143,29 @@ func isCompatibleABI(tag, pythonVersion string) bool { return false } -// isCompatiblePlatform checks if the wheel's platform tag is compatible. -func isCompatiblePlatform(tag string, arch types.Architecture) bool { +// isCompatiblePlatform checks if the wheel's platform tag is compatible +// with the given architecture and libc, without version limits. +func isCompatiblePlatform(tag string, arch types.Architecture, libc string) bool { if tag == "any" { return true } - compatible := platformTags(arch) + machine, ok := archToMachine[arch] + if !ok { + return false + } for t := range strings.SplitSeq(tag, ".") { - if slices.Contains(compatible, t) { + if isLinuxPlatformTag(t, machine, libc) { return true } } return false } -// wheelScore returns a priority score for the wheel. Higher is better. -// Binary wheels for the exact platform are preferred over pure-Python wheels. -func wheelScore(w wheelFileParts, pythonVersion string, arch types.Architecture) int { - score := 0 - - // Prefer exact CPython tag over generic py3 - cpTag := "cp" + strings.ReplaceAll(pythonVersion, ".", "") - for t := range strings.SplitSeq(w.PythonTag, ".") { - if t == cpTag { - score += 100 - break - } - } - - // Prefer specific ABI over none/abi3 - for t := range strings.SplitSeq(w.ABITag, ".") { - switch t { - case cpTag: - score += 50 - case "abi3": - score += 25 - } - } - - // Prefer specific platform over any - if w.PlatformTag != "any" { - platTags := platformTags(arch) - for i, pt := range platTags { - for pp := range strings.SplitSeq(w.PlatformTag, ".") { - if pp == pt { - // More specific platforms (earlier in list) get higher scores - score += 10 * (len(platTags) - i) - } - } - } +// isBetterWheel returns true if candidate is a better choice than current. +// Prefers binary wheels over pure-python. +func isBetterWheel(current, candidate wheelFileParts) bool { + if !isBinaryWheel(current) && isBinaryWheel(candidate) { + return true } - - return score + return false } diff --git a/pkg/ecosystem/python/platform_test.go b/pkg/ecosystem/python/platform_test.go index 275873a88..466d92452 100644 --- a/pkg/ecosystem/python/platform_test.go +++ b/pkg/ecosystem/python/platform_test.go @@ -20,51 +20,60 @@ import ( "chainguard.dev/apko/pkg/build/types" ) -func TestPlatformTags(t *testing.T) { +func TestIsLinuxPlatformTag(t *testing.T) { tests := []struct { - arch string - wantLen int - wantAny string // At least one tag should contain this + tag, machine string + libc string + want bool }{ - {"amd64", 5, "x86_64"}, - {"arm64", 3, "aarch64"}, - {"arm/v7", 3, "armv7l"}, - {"386", 5, "i686"}, - {"ppc64le", 3, "ppc64le"}, - {"s390x", 3, "s390x"}, + // musl accepts musllinux, rejects manylinux + {"musllinux_1_2_x86_64", "x86_64", "musl", true}, + {"manylinux_2_17_x86_64", "x86_64", "musl", false}, + // glibc accepts manylinux, rejects musllinux + {"manylinux_2_17_x86_64", "x86_64", "glibc", true}, + {"manylinux_2_99_x86_64", "x86_64", "glibc", true}, // no version ceiling + {"manylinux2014_x86_64", "x86_64", "glibc", true}, // legacy alias + {"manylinux1_i686", "i686", "glibc", true}, // legacy alias + {"musllinux_1_2_x86_64", "x86_64", "glibc", false}, + // linux_ fallback works for both + {"linux_x86_64", "x86_64", "musl", true}, + {"linux_x86_64", "x86_64", "glibc", true}, + // wrong machine or non-linux + {"musllinux_1_2_aarch64", "x86_64", "musl", false}, + {"macosx_10_9_x86_64", "x86_64", "glibc", false}, + {"any", "x86_64", "glibc", false}, } - for _, tt := range tests { - t.Run(tt.arch, func(t *testing.T) { - tags := platformTags(types.ParseArchitecture(tt.arch)) - if len(tags) != tt.wantLen { - t.Errorf("platformTags(%s) returned %d tags, want %d", tt.arch, len(tags), tt.wantLen) - } - found := false - for _, tag := range tags { - if contains(tag, tt.wantAny) { - found = true - break - } - } - if !found { - t.Errorf("platformTags(%s) = %v, none contain %q", tt.arch, tags, tt.wantAny) + t.Run(tt.tag, func(t *testing.T) { + if got := isLinuxPlatformTag(tt.tag, tt.machine, tt.libc); got != tt.want { + t.Errorf("isLinuxPlatformTag(%q, %q, %v) = %v, want %v", tt.tag, tt.machine, tt.libc, got, tt.want) } }) } } -func contains(s, substr string) bool { - return len(s) >= len(substr) && (s == substr || len(s) > 0 && containsSubstr(s, substr)) +func TestIsBetterWheel(t *testing.T) { + pure := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} + binary := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} + + if !isBetterWheel(pure, binary) { + t.Error("binary wheel should be better than pure python") + } + if isBetterWheel(binary, pure) { + t.Error("pure python should not be better than binary") + } + if isBetterWheel(binary, binary) { + t.Error("identical wheels should not be better") + } } -func containsSubstr(s, substr string) bool { - for i := 0; i <= len(s)-len(substr); i++ { - if s[i:i+len(substr)] == substr { - return true +func TestArchToMachine(t *testing.T) { + // All standard architectures should have a mapping. + for _, arch := range []string{"amd64", "arm64", "arm/v7", "arm/v6", "386", "ppc64le", "s390x", "riscv64", "loong64"} { + if _, ok := archToMachine[types.ParseArchitecture(arch)]; !ok { + t.Errorf("archToMachine missing %q", arch) } } - return false } func TestParseWheelFilename(t *testing.T) { @@ -140,48 +149,59 @@ func TestIsCompatibleWheel(t *testing.T) { wheel wheelFileParts pyVer string arch string + libc string want bool }{ { - name: "pure python wheel is always compatible", + name: "pure python wheel on glibc", + wheel: wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: true, + }, + { + name: "pure python wheel on musl", wheel: wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"}, - pyVer: "3.12", - arch: "amd64", - want: true, + pyVer: "3.12", arch: "amd64", libc: "musl", want: true, }, { - name: "cpython binary for matching arch", + name: "manylinux on glibc", wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"}, - pyVer: "3.12", - arch: "amd64", - want: true, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: true, }, { - name: "cpython binary for wrong arch", + name: "manylinux on musl is rejected", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "musl", want: false, + }, + { + name: "musllinux on musl", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "musllinux_1_2_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "musl", want: true, + }, + { + name: "musllinux on glibc is rejected", + wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "musllinux_1_2_x86_64"}, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: false, + }, + { + name: "wrong arch", wheel: wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_aarch64"}, - pyVer: "3.12", - arch: "amd64", - want: false, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: false, }, { name: "wrong python version", wheel: wheelFileParts{PythonTag: "cp311", ABITag: "cp311", PlatformTag: "any"}, - pyVer: "3.12", - arch: "amd64", - want: false, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: false, }, { - name: "abi3 is compatible", + name: "abi3 on glibc", wheel: wheelFileParts{PythonTag: "cp312", ABITag: "abi3", PlatformTag: "manylinux_2_17_x86_64"}, - pyVer: "3.12", - arch: "amd64", - want: true, + pyVer: "3.12", arch: "amd64", libc: "glibc", want: true, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := isCompatibleWheel(tt.wheel, tt.pyVer, types.ParseArchitecture(tt.arch)) + got := isCompatibleWheel(tt.wheel, tt.pyVer, types.ParseArchitecture(tt.arch), tt.libc) if got != tt.want { t.Errorf("isCompatibleWheel() = %v, want %v", got, tt.want) } @@ -189,14 +209,18 @@ func TestIsCompatibleWheel(t *testing.T) { } } -func TestWheelScore(t *testing.T) { - pureWheel := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} - binaryWheel := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} - - pureScore := wheelScore(pureWheel, "3.12", types.ParseArchitecture("amd64")) - binaryScore := wheelScore(binaryWheel, "3.12", types.ParseArchitecture("amd64")) +func TestWheelSelection(t *testing.T) { + // When both pure and binary wheels are compatible, binary wins. + pure := wheelFileParts{PythonTag: "py3", ABITag: "none", PlatformTag: "any"} + binary := wheelFileParts{PythonTag: "cp312", ABITag: "cp312", PlatformTag: "manylinux_2_17_x86_64"} - if binaryScore <= pureScore { - t.Errorf("binary wheel score (%d) should be higher than pure wheel score (%d)", binaryScore, pureScore) + if !isCompatibleWheel(pure, "3.12", types.ParseArchitecture("amd64"), "glibc") { + t.Fatal("pure wheel should be compatible") + } + if !isCompatibleWheel(binary, "3.12", types.ParseArchitecture("amd64"), "glibc") { + t.Fatal("binary wheel should be compatible on glibc") + } + if !isBetterWheel(pure, binary) { + t.Error("binary should be preferred over pure") } } diff --git a/pkg/ecosystem/python/python.go b/pkg/ecosystem/python/python.go index 6c6a197bf..2c5451c8b 100644 --- a/pkg/ecosystem/python/python.go +++ b/pkg/ecosystem/python/python.go @@ -55,7 +55,7 @@ type installer struct{} func (i *installer) Name() string { return "python" } -func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { +func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, arch types.Architecture, libc string, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { if len(config.Packages) == 0 { return nil, nil } @@ -75,7 +75,7 @@ func (i *installer) Resolve(ctx context.Context, config types.EcosystemConfig, a return nil, fmt.Errorf("python_version is required in ecosystem python config") } - return resolvePackages(ctx, specs, indexes, pythonVersion, arch, a) + return resolvePackages(ctx, specs, indexes, pythonVersion, arch, libc, a) } func (i *installer) Install(ctx context.Context, fsys apkfs.FullFS, packages []ecosystem.ResolvedPackage, config types.EcosystemConfig, a auth.Authenticator) (map[string]string, error) { diff --git a/pkg/ecosystem/python/python_test.go b/pkg/ecosystem/python/python_test.go index 9703819f9..386f0b0ab 100644 --- a/pkg/ecosystem/python/python_test.go +++ b/pkg/ecosystem/python/python_test.go @@ -15,6 +15,7 @@ package python import ( + "strings" "testing" apkfs "chainguard.dev/apko/pkg/apk/fs" @@ -38,10 +39,10 @@ func TestCreateVenv(t *testing.T) { t.Fatalf("reading pyvenv.cfg: %v", err) } cfg := string(data) - if !contains(cfg, "home = /usr/bin") { + if !strings.Contains(cfg, "home = /usr/bin") { t.Errorf("pyvenv.cfg missing home, got: %q", cfg) } - if !contains(cfg, "version = 3.12") { + if !strings.Contains(cfg, "version = 3.12") { t.Errorf("pyvenv.cfg missing version, got: %q", cfg) } diff --git a/pkg/ecosystem/python/resolve.go b/pkg/ecosystem/python/resolve.go index 0b2f72fd3..d00d9b1e6 100644 --- a/pkg/ecosystem/python/resolve.go +++ b/pkg/ecosystem/python/resolve.go @@ -162,7 +162,7 @@ type pypiVersionsJSON struct { // resolvePackages resolves package specs to specific wheel URLs, // including transitive dependencies discovered via the PyPI JSON API. -func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { +func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) ([]ecosystem.ResolvedPackage, error) { log := clog.FromContext(ctx) if len(indexes) == 0 { @@ -185,7 +185,7 @@ func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, continue } - pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch, a) + pkg, deps, err := resolveOneWithDeps(ctx, spec, indexes, pythonVersion, arch, libc, a) if err != nil { return nil, fmt.Errorf("resolving %s: %w", spec.Name, err) } @@ -207,10 +207,10 @@ func resolvePackages(ctx context.Context, specs []packageSpec, indexes []string, // resolveOneWithDeps resolves a package and returns both the resolved package // and its transitive dependencies. It tries the PyPI JSON API first (which // gives us clean metadata), falling back to the Simple API for non-PyPI indexes. -func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { // Try PyPI JSON API first — it gives us metadata + wheel URLs in one call if usesDefaultPyPI(indexes) { - pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch, a) + pkg, deps, err := resolveViaJSON(ctx, spec, pythonVersion, arch, libc, a) if err == nil { return pkg, deps, nil } @@ -218,7 +218,7 @@ func resolveOneWithDeps(ctx context.Context, spec packageSpec, indexes []string, } // Fall back to Simple API (downloads wheel to extract Requires-Dist for deps) - pkg, deps, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch, a) + pkg, deps, err := resolveViaSimple(ctx, spec, indexes, pythonVersion, arch, libc, a) if err != nil { return ecosystem.ResolvedPackage{}, nil, err } @@ -239,12 +239,12 @@ func usesDefaultPyPI(indexes []string) bool { // resolveViaJSON resolves a package using the PyPI JSON API. // Returns the resolved package and its parsed Requires-Dist as deps. -func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { name := normalizeName(spec.Name) // If we have an exact version, fetch that directly if spec.Operator == "==" { - return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch, a) + return resolveJSONVersion(ctx, name, spec.Name, spec.Version, pythonVersion, arch, libc, a) } // Otherwise, list all versions and pick the best @@ -277,11 +277,11 @@ func resolveViaJSON(ctx context.Context, spec packageSpec, pythonVersion string, return ecosystem.ResolvedPackage{}, nil, fmt.Errorf("no matching version for %s%s%s", spec.Name, spec.Operator, spec.Version) } - return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch, a) + return resolveJSONVersion(ctx, name, spec.Name, bestVersion, pythonVersion, arch, libc, a) } // resolveJSONVersion fetches a specific version from the PyPI JSON API. -func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveJSONVersion(ctx context.Context, normalizedName, originalName, version, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { versionURL := pypiJSONBase() + normalizedName + "/" + version + "/json" data, err := httpGet(ctx, versionURL, a) if err != nil { @@ -294,7 +294,7 @@ func resolveJSONVersion(ctx context.Context, normalizedName, originalName, versi } // Find the best wheel from the URLs - wheelURL, checksum, err := selectBestWheelFromJSON(pkgResp.URLs, pythonVersion, arch) + wheelURL, checksum, err := selectBestWheelFromJSON(pkgResp.URLs, pythonVersion, arch, libc) if err != nil { return ecosystem.ResolvedPackage{}, nil, err } @@ -319,10 +319,9 @@ func resolveJSONVersion(ctx context.Context, normalizedName, originalName, versi } // selectBestWheelFromJSON picks the best compatible wheel from PyPI JSON API URLs. -func selectBestWheelFromJSON(urls []pypiURL, pythonVersion string, arch types.Architecture) (string, string, error) { +func selectBestWheelFromJSON(urls []pypiURL, pythonVersion string, arch types.Architecture, libc string) (string, string, error) { var bestURL *pypiURL var bestParts wheelFileParts - bestScore := -1 for i, u := range urls { if u.PackageType != "bdist_wheel" { @@ -332,16 +331,13 @@ func selectBestWheelFromJSON(urls []pypiURL, pythonVersion string, arch types.Ar if err != nil { continue } - if !isCompatibleWheel(parts, pythonVersion, arch) { + if !isCompatibleWheel(parts, pythonVersion, arch, libc) { continue } - score := wheelScore(parts, pythonVersion, arch) - if bestURL == nil || score > bestScore { + if bestURL == nil || isBetterWheel(bestParts, parts) { bestURL = &urls[i] bestParts = parts - _ = bestParts // used for future scoring - bestScore = score } } @@ -461,7 +457,7 @@ func parseSimpleIndex(body string, baseURL string) []wheelLink { // resolveViaSimple resolves a package using the PEP 503 Simple API. // After finding the best wheel, it downloads it to extract Requires-Dist // metadata for transitive dependency resolution. -func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { +func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, pythonVersion string, arch types.Architecture, libc string, a auth.Authenticator) (ecosystem.ResolvedPackage, []packageSpec, error) { name := normalizeName(spec.Name) for _, index := range indexes { @@ -478,7 +474,7 @@ func resolveViaSimple(ctx context.Context, spec packageSpec, indexes []string, p continue } - best, err := selectBestWheel(links, spec, pythonVersion, arch) + best, err := selectBestWheel(links, spec, pythonVersion, arch, libc) if err != nil { continue } @@ -563,28 +559,25 @@ type selectedWheel struct { } // selectBestWheel selects the best compatible wheel from Simple API links. -func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, arch types.Architecture) (selectedWheel, error) { +func selectBestWheel(links []wheelLink, spec packageSpec, pythonVersion string, arch types.Architecture, libc string) (selectedWheel, error) { var bestLink *wheelLink var bestParts wheelFileParts - bestScore := -1 for i, link := range links { parts, err := parseWheelFilename(link.Filename) if err != nil { continue } - if !isCompatibleWheel(parts, pythonVersion, arch) { + if !isCompatibleWheel(parts, pythonVersion, arch, libc) { continue } if !matchesVersionSpec(parts.Version, spec) { continue } - score := wheelScore(parts, pythonVersion, arch) - if bestLink == nil || compareVersions(parts.Version, bestParts.Version) > 0 || (compareVersions(parts.Version, bestParts.Version) == 0 && score > bestScore) { + if bestLink == nil || compareVersions(parts.Version, bestParts.Version) > 0 || (compareVersions(parts.Version, bestParts.Version) == 0 && isBetterWheel(bestParts, parts)) { bestLink = &links[i] bestParts = parts - bestScore = score } } diff --git a/pkg/ecosystem/python/resolve_test.go b/pkg/ecosystem/python/resolve_test.go index 4fa17dff1..3aa13cec8 100644 --- a/pkg/ecosystem/python/resolve_test.go +++ b/pkg/ecosystem/python/resolve_test.go @@ -325,7 +325,7 @@ func TestResolveWithMockJSON(t *testing.T) { pypiJSONBaseOverride = server.URL + "/pypi/" specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} - resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), nil) + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), "glibc", nil) if err != nil { t.Fatalf("resolvePackages() error: %v", err) } @@ -409,7 +409,7 @@ func TestResolveTransitiveDeps(t *testing.T) { defer func() { pypiJSONBaseOverride = "" }() specs := []packageSpec{{Name: "flask", Operator: "==", Version: "3.0.0"}} - resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), nil) + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), "glibc", nil) if err != nil { t.Fatalf("resolvePackages() error: %v", err) } @@ -457,7 +457,7 @@ func TestResolveSimpleApiFallback(t *testing.T) { specs := []packageSpec{{Name: "mypackage", Operator: "==", Version: "1.0.0"}} // Use a non-pypi index so it doesn't try the JSON API - resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), nil) + resolved, err := resolvePackages(context.Background(), specs, []string{server.URL + "/simple/"}, "3.12", types.ParseArchitecture("amd64"), "glibc", nil) if err != nil { t.Fatalf("resolvePackages() error: %v", err) }