diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 16d9cf6b..a2d85153 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -55,6 +55,11 @@ jobs: set -euxo pipefail sudo apt-get update sudo apt-get purge firefox passt + # Microsoft Edge (preinstalled on the runner image) ships two AppArmor + # profiles attached to the same binary (/etc/apparmor.d/msedge and + # /etc/apparmor.d/microsoft-edge-stable). That duplicate attachment makes + # aa-disable abort while parsing all profiles, so remove them first. + sudo rm -f /etc/apparmor.d/msedge /etc/apparmor.d/microsoft-edge-stable sudo systemctl reload apparmor.service sudo apt-get install apparmor-utils sudo aa-disable /usr/sbin/unix_chkpwd @@ -90,6 +95,11 @@ jobs: set -euxo pipefail sudo apt-get update sudo apt-get purge firefox passt + # Microsoft Edge (preinstalled on the runner image) ships two AppArmor + # profiles attached to the same binary (/etc/apparmor.d/msedge and + # /etc/apparmor.d/microsoft-edge-stable). That duplicate attachment makes + # aa-disable abort while parsing all profiles, so remove them first. + sudo rm -f /etc/apparmor.d/msedge /etc/apparmor.d/microsoft-edge-stable sudo systemctl reload apparmor.service sudo apt-get install apparmor-utils sudo aa-disable /usr/sbin/unix_chkpwd diff --git a/docs/user/reference/config/overlays.md b/docs/user/reference/config/overlays.md index 55061118..cc41528e 100644 --- a/docs/user/reference/config/overlays.md +++ b/docs/user/reference/config/overlays.md @@ -47,20 +47,44 @@ successfully makes a replacement to at least one matching file. | `file-remove` | Removes a file | `file` | Glob pattern for files to remove | | `file-rename` | Renames a file within the same directory | `file`, `replacement` | Name of file to rename | +> **Tip:** `file-remove` and `file-search-replace` can also operate inside a source archive by +> setting the `archive` field — see [Archive Overlays](#archive-overlays). + +### Archive Overlays + +A `file-remove` or `file-search-replace` overlay can modify files **inside** a source archive +instead of loose files in the sources tree. Set the `archive` field to scope it to that archive. +The archive is extracted into a temporary directory, the matching files are modified with the +same machinery as loose-file overlays, and the archive is repacked with its original compression +format. Extraction and repacking are handled natively. + +> **Note:** Archive overlays are batched per archive — all overlays targeting the same archive +> share a single extract/modify/repack cycle — and the `sources` file is rehashed afterward to +> reflect the repacked archive. They are processed independently of spec and loose-file overlays. + +> **Extraction root:** The `file` glob in an archive overlay is interpreted relative to the archive's extraction root. By default the root is inferred: if the archive unpacks to a single top-level directory (the conventional `%{name}-%{version}` layout) that directory is used; otherwise the archive root is used. Set `archive-root` to override this — the equivalent of rpmbuild's `%setup -n` — when an archive's top-level directory does not follow that convention. + +| Type | Description | Required Fields | +|------|-------------|-----------------| +| `file-remove` + `archive` | Removes file(s) matching a glob pattern from inside an archive | `archive`, `file` | +| `file-search-replace` + `archive` | Regex-based search and replace on file(s) inside an archive | `archive`, `file`, `regex` | + ## Field Reference | Field | TOML Key | Description | Used By | |-------|----------|-------------|---------| | Type | `type` | **Required.** The overlay type to apply | All overlays | | Description | `description` | Human-readable explanation documenting the need for the change; helps identify overlays in error messages | All (optional) | +| Archive | `archive` | The source archive filename to scope an overlay to (must be a basename, not a path). When set, the overlay operates on files inside that archive. | `file-remove`, `file-search-replace` (optional) | +| Archive root | `archive-root` | Top-level directory inside the archive to treat as the extraction root (mirrors `%setup -n`); inferred when unset. Must be a local relative path (no `..` or absolute paths). When multiple overlays target the same archive, any that set this must agree. | archive-scoped `file-remove` / `file-search-replace` (optional) | | Tag | `tag` | The spec tag name (e.g., `BuildRequires`, `Requires`, `Version`) | `spec-add-tag`, `spec-insert-tag`, `spec-set-tag`, `spec-update-tag`, `spec-remove-tag` | -| Value | `value` | The tag value to set, or value to match for removal | `spec-add-tag`, `spec-insert-tag`, `spec-set-tag`, `spec-update-tag`, `spec-remove-tag` (optional for matching) | +| Value | `value` | The tag value to set, or value to match for removal. | `spec-add-tag`, `spec-insert-tag`, `spec-set-tag`, `spec-update-tag`, `spec-remove-tag` (optional for matching) | | Section | `section` | The spec section to target (e.g., `%build`, `%install`, `%files`, `%description`) | `spec-prepend-lines`, `spec-append-lines`, `spec-search-replace` (optional), `spec-remove-section` | | Package | `package` | The sub-package name for multi-package specs; omit to target the main package | All spec overlays (optional, except `spec-remove-subpackage` which **requires** it) | | Regex | `regex` | Regular expression pattern to match | `spec-search-replace`, `file-search-replace` | | Replacement | `replacement` | Literal replacement text; capture group references like `$1` are **not** expanded. Omit or leave empty to delete matched text. | `spec-search-replace`, `file-search-replace`, `file-rename` | | Lines | `lines` | Array of text lines to insert | `spec-prepend-lines`, `spec-append-lines`, `file-prepend-lines` | -| File | `file` | The name of the non-spec file to modify or add | `file-prepend-lines`, `file-search-replace`, `file-add`, `file-remove`, `file-rename`, `patch-add` (optional), `patch-remove` | +| File | `file` | The name of the non-spec file to modify or add, or a glob pattern. For an archive-scoped overlay, it is matched against the archive's extracted contents. | `file-prepend-lines`, `file-search-replace`, `file-add`, `file-remove`, `file-rename`, `patch-add` (optional), `patch-remove` | | Source | `source` | Path to source file for `file-add` and `patch-add`; relative paths are relative to the config file | `file-add`, `patch-add` | > **Note:** For `file-rename`, the `replacement` field is a **filename only** (not a path). The file is renamed within its current directory. @@ -274,6 +298,37 @@ description = "Remove CVE patches that are now upstream" > `PatchN` tags. Macro-based tag numbering (e.g., `Patch%{n}`) is not expanded and may > conflict with auto-assigned numbers. +### Removing a File from an Archive + +Set the `archive` field on a `file-remove` overlay to delete files matching a glob pattern from +inside a source archive. The archive is extracted, matching files are removed, and the archive is +repacked. + +```toml +[[components.mypackage.overlays]] +type = "file-remove" +archive = "mypackage-1.0.tar.gz" +file = "vendor/**" +description = "Remove all bundled vendor files" +``` + +> **Tip:** Without the `archive` field, the same `file-remove` overlay removes a loose file from +> the sources tree instead. The `archive` field is the only thing that scopes it to an archive. + +### Search and Replace Inside an Archive + +Set the `archive` field on a `file-search-replace` overlay to rewrite content inside an archive: + +```toml +[[components.mypackage.overlays]] +type = "file-search-replace" +archive = "mypackage-1.0.tar.xz" +file = "configure.ac" +regex = "AC_CHECK_LIB\\(old_lib" +replacement = "AC_CHECK_LIB(new_lib" +description = "Update library reference in configure script" +``` + ### Removing a Section The `spec-remove-section` overlay removes an entire section from the spec, including its diff --git a/internal/app/azldev/cmds/component/preparesources.go b/internal/app/azldev/cmds/component/preparesources.go index 2f0ffcaa..3dc7d687 100644 --- a/internal/app/azldev/cmds/component/preparesources.go +++ b/internal/app/azldev/cmds/component/preparesources.go @@ -138,13 +138,7 @@ func PrepareComponentSources(env *azldev.Env, options *PrepareSourcesOptions) er ) } - if options.AllowNoHashes { - preparerOpts = append(preparerOpts, sources.WithAllowNoHashes()) - } - - if options.SkipSources { - preparerOpts = append(preparerOpts, sources.WithSkipLookaside()) - } + preparerOpts = appendPrepareSourcesOptions(env, preparerOpts, options, distro) preparer, err := sources.NewPreparer(sourceManager, env.FS(), env, env, preparerOpts...) if err != nil { @@ -194,3 +188,23 @@ func CheckOutputDir(env *azldev.Env, options *PrepareSourcesOptions) error { "use --force to delete and recreate it", options.OutputDir) } + +// appendPrepareSourcesOptions appends conditional preparer options that control +// hashing and lookaside behavior. Extracted from +// [PrepareComponentSources] to keep cyclomatic complexity within limits. +func appendPrepareSourcesOptions( + _ *azldev.Env, + opts []sources.PreparerOption, + options *PrepareSourcesOptions, + _ sourceproviders.ResolvedDistro, +) []sources.PreparerOption { + if options.AllowNoHashes { + opts = append(opts, sources.WithAllowNoHashes()) + } + + if options.SkipSources { + opts = append(opts, sources.WithSkipLookaside()) + } + + return opts +} diff --git a/internal/app/azldev/core/sources/archiveoverlays.go b/internal/app/azldev/core/sources/archiveoverlays.go new file mode 100644 index 00000000..f54711ae --- /dev/null +++ b/internal/app/azldev/core/sources/archiveoverlays.go @@ -0,0 +1,238 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "fmt" + "log/slog" + "os" + "path/filepath" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/opctx" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/archive" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/rootfs" +) + +// applyArchiveOverlays groups archive overlays by target archive and processes +// them in order. Multiple overlays targeting the same archive are batched into +// a single extract/modify/repack cycle. File removals inside the archive reuse +// the same machinery as loose-file overlays ([applyNonSpecOverlay]). +func applyArchiveOverlays( + dryRunnable opctx.DryRunnable, + eventListener opctx.EventListener, + sourcesDirPath string, + overlays []projectconfig.ComponentOverlay, +) error { + groups, err := groupOverlaysByArchive(overlays) + if err != nil { + return err + } + + if len(groups) == 0 { + return nil + } + + operationCount := 0 + for _, group := range groups { + operationCount += len(group.overlays) + } + + event := eventListener.StartEvent("Applying archive overlays", + "archives", len(groups), + "operations", operationCount, + ) + defer event.End() + + for _, group := range groups { + if err := processArchive(dryRunnable, sourcesDirPath, group); err != nil { + return fmt.Errorf("archive overlay failed for %#q:\n%w", group.archive, err) + } + } + + return nil +} + +// archiveGroup holds overlays targeting the same archive, preserving order. +type archiveGroup struct { + archive string + root string + overlays []projectconfig.ComponentOverlay +} + +// groupOverlaysByArchive groups archive overlays by their +// [projectconfig.ComponentOverlay.Archive] field, preserving insertion order +// within each group and across groups. Non-archive overlays are silently skipped. +// +// The optional [projectconfig.ComponentOverlay.ArchiveRoot] override (mirroring +// rpmbuild's `%setup -n`) is reconciled per archive: all overlays targeting the +// same archive that set it must agree, otherwise the configuration is ambiguous +// and an error is returned. +func groupOverlaysByArchive(overlays []projectconfig.ComponentOverlay) ([]archiveGroup, error) { + orderMap := make(map[string]int) + + var groups []archiveGroup + + for _, overlay := range overlays { + if !overlay.ModifiesArchive() { + continue + } + + idx, exists := orderMap[overlay.Archive] + if !exists { + idx = len(groups) + orderMap[overlay.Archive] = idx + + groups = append(groups, archiveGroup{archive: overlay.Archive}) + } + + if overlay.ArchiveRoot != "" { + if groups[idx].root != "" && groups[idx].root != overlay.ArchiveRoot { + return nil, fmt.Errorf( + "conflicting %#q overrides for archive %#q: %#q vs %#q", + "archive-root", overlay.Archive, groups[idx].root, overlay.ArchiveRoot, + ) + } + + groups[idx].root = overlay.ArchiveRoot + } + + groups[idx].overlays = append(groups[idx].overlays, overlay) + } + + return groups, nil +} + +// processArchive extracts an archive to a temp directory, applies all overlays, +// and deterministically repacks it in-place with the original compression. +func processArchive( + dryRunnable opctx.DryRunnable, + sourcesDirPath string, + group archiveGroup, +) error { + archivePath := filepath.Join(sourcesDirPath, group.archive) + + // Create a temporary directory for extraction directly on the real filesystem. + // The [archive] package operates exclusively through OS primitives ([os.Root], + // os.*), so the work directory must be a genuine on-disk path regardless of the + // injected FS implementation. Using os.MkdirTemp here (instead of the injected + // FS) makes that requirement explicit and keeps the path valid even when fs is + // an in-memory or otherwise non-OS-backed FS (e.g., in tests or alternate runners). + workDir, err := os.MkdirTemp("", "archive-overlay-") + if err != nil { + return fmt.Errorf("creating temp directory:\n%w", err) + } + + defer func() { + if removeErr := os.RemoveAll(workDir); removeErr != nil { + slog.Warn("Failed to clean up archive work directory", "error", removeErr) + } + }() + + // Extract the archive; compression is inferred from the filename extension. + if err := archive.ExtractAuto(archivePath, workDir); err != nil { + return fmt.Errorf("extracting archive:\n%w", err) + } + + // Determine the root of the extracted content. Most source archives have + // a single top-level directory (e.g., "pkg-1.0/"); group.root overrides this + // inference when set (mirrors rpmbuild's `%setup -n`). + extractRoot, err := resolveExtractRoot(workDir, group.root) + if err != nil { + return fmt.Errorf("resolving extract root:\n%w", err) + } + + // Confine an FS to the extract root so file overlays reuse the same machinery + // as loose-file overlays. The extracted tree is always on the real filesystem + // (written by the [archive] package), so root it on an OS-backed FS regardless + // of the injected fs implementation. + extractFS, err := rootfs.New(extractRoot) + if err != nil { + return fmt.Errorf("confining FS to extract root:\n%w", err) + } + + defer func() { + if closeErr := extractFS.Close(); closeErr != nil { + slog.Warn("Failed to close extract-root FS", "error", closeErr) + } + }() + + // Apply each overlay operation in order. Archive overlays are restricted to + // file-remove / file-search-replace (see [projectconfig.ComponentOverlay.ModifiesArchive]), + // which operate solely on the destination tree, so the extract-root FS is passed as + // both the source and destination FS — there is no component-source FS to read from. + for _, overlay := range group.overlays { + if err := applyNonSpecOverlay(dryRunnable, extractFS, extractFS, overlay); err != nil { + return fmt.Errorf("applying %#q operation:\n%w", overlay.Type, err) + } + } + + // Deterministically repack the archive in-place, reusing the original compression. + if err := archive.CreateDeterministicArchiveAuto(archivePath, workDir); err != nil { + return fmt.Errorf("repacking archive:\n%w", err) + } + + slog.Info("Archive overlay applied", "archive", group.archive) + + return nil +} + +// resolveExtractRoot returns the effective root of an extracted archive. +// When rootOverride is set (the `%setup -n` equivalent), the named subdirectory +// of workDir is used; it must be a local path that exists as a directory. When +// rootOverride is empty, the root is inferred: if workDir contains exactly one +// entry and that entry is a directory (the common case for source archives like +// "pkg-1.0/"), that subdirectory is returned; otherwise workDir itself is +// returned. +func resolveExtractRoot(workDir, rootOverride string) (string, error) { + if rootOverride != "" { + // Defense in depth: validation already rejects non-local overrides, but + // re-check before joining so a malformed value can never escape workDir. + if !filepath.IsLocal(rootOverride) { + return "", fmt.Errorf("archive root %#q is not a local path", rootOverride) + } + + target := filepath.Join(workDir, rootOverride) + + info, err := os.Stat(target) + if err != nil { + return "", fmt.Errorf("archive root %#q not found after extraction:\n%w", rootOverride, err) + } + + if !info.IsDir() { + return "", fmt.Errorf("archive root %#q is not a directory", rootOverride) + } + + return target, nil + } + + entries, err := os.ReadDir(workDir) + if err != nil { + return "", fmt.Errorf("reading extracted directory:\n%w", err) + } + + if len(entries) == 1 && entries[0].IsDir() { + return filepath.Join(workDir, entries[0].Name()), nil + } + + return workDir, nil +} + +// archiveNamesFromOverlays returns the unique archive filenames targeted by +// archive overlays in the given overlay list. Used by [updateSourcesFile] to +// determine which 'sources' entries need rehashing after overlay application. +func archiveNamesFromOverlays(overlays []projectconfig.ComponentOverlay) []string { + seen := make(map[string]bool) + + var names []string + + for _, overlay := range overlays { + if overlay.ModifiesArchive() && !seen[overlay.Archive] { + seen[overlay.Archive] = true + names = append(names, overlay.Archive) + } + } + + return names +} diff --git a/internal/app/azldev/core/sources/archiveoverlays_internal_test.go b/internal/app/azldev/core/sources/archiveoverlays_internal_test.go new file mode 100644 index 00000000..1a188f8f --- /dev/null +++ b/internal/app/azldev/core/sources/archiveoverlays_internal_test.go @@ -0,0 +1,218 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +package sources + +import ( + "os" + "testing" + + "github.com/microsoft/azure-linux-dev-tools/internal/global/testctx" + "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/rootfs" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestGroupOverlaysByArchive(t *testing.T) { + t.Run("groups overlays by archive name preserving order", func(t *testing.T) { + overlays := []projectconfig.ComponentOverlay{ + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + Filename: "unwanted.conf", + }, + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + Filename: "config.h", + }, + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "other-2.0.tar.xz", + Filename: "docs/*.md", + }, + } + + groups, err := groupOverlaysByArchive(overlays) + require.NoError(t, err) + + require.Len(t, groups, 2) + + assert.Equal(t, "pkg-1.0.tar.gz", groups[0].archive) + require.Len(t, groups[0].overlays, 2) + assert.Equal(t, "unwanted.conf", groups[0].overlays[0].Filename) + assert.Equal(t, "config.h", groups[0].overlays[1].Filename) + + assert.Equal(t, "other-2.0.tar.xz", groups[1].archive) + require.Len(t, groups[1].overlays, 1) + }) + + t.Run("skips overlays that are not archive-scoped", func(t *testing.T) { + overlays := []projectconfig.ComponentOverlay{ + {Type: projectconfig.ComponentOverlaySetSpecTag, Tag: "Version", Value: "1.0"}, + {Type: projectconfig.ComponentOverlayRemoveFile, Archive: "pkg.tar.gz", Filename: "f"}, + // Plain (non-archive) file overlay: no Archive set, so it must be skipped. + {Type: projectconfig.ComponentOverlayRemoveFile, Filename: "loose.txt"}, + {Type: projectconfig.ComponentOverlayAddFile, Filename: "new.txt", Source: "src"}, + } + + groups, err := groupOverlaysByArchive(overlays) + require.NoError(t, err) + + require.Len(t, groups, 1) + assert.Equal(t, "pkg.tar.gz", groups[0].archive) + require.Len(t, groups[0].overlays, 1) + }) + + t.Run("reconciles matching archive-root overrides", func(t *testing.T) { + overlays := []projectconfig.ComponentOverlay{ + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + ArchiveRoot: "custom-root", + Filename: "a.conf", + }, + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + Filename: "b.conf", + }, + } + + groups, err := groupOverlaysByArchive(overlays) + require.NoError(t, err) + + require.Len(t, groups, 1) + assert.Equal(t, "custom-root", groups[0].root) + }) + + t.Run("errors on conflicting archive-root overrides", func(t *testing.T) { + overlays := []projectconfig.ComponentOverlay{ + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + ArchiveRoot: "root-a", + Filename: "a.conf", + }, + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + ArchiveRoot: "root-b", + Filename: "b.conf", + }, + } + + _, err := groupOverlaysByArchive(overlays) + require.Error(t, err) + assert.Contains(t, err.Error(), "conflicting") + }) +} + +func TestResolveExtractRoot(t *testing.T) { + t.Run("infers single top-level directory", func(t *testing.T) { + workDir := t.TempDir() + require.NoError(t, os.MkdirAll(workDir+"/pkg-1.0", 0o755)) + + root, err := resolveExtractRoot(workDir, "") + require.NoError(t, err) + assert.Equal(t, workDir+"/pkg-1.0", root) + }) + + t.Run("falls back to workDir for multiple top-level entries", func(t *testing.T) { + workDir := t.TempDir() + require.NoError(t, os.MkdirAll(workDir+"/dirA", 0o755)) + require.NoError(t, os.WriteFile(workDir+"/loose.txt", nil, fileperms.PrivateFile)) + + root, err := resolveExtractRoot(workDir, "") + require.NoError(t, err) + assert.Equal(t, workDir, root) + }) + + t.Run("override selects named subdirectory", func(t *testing.T) { + workDir := t.TempDir() + // Two top-level dirs so the heuristic would not pick one. + require.NoError(t, os.MkdirAll(workDir+"/dirA", 0o755)) + require.NoError(t, os.MkdirAll(workDir+"/dirB", 0o755)) + + root, err := resolveExtractRoot(workDir, "dirB") + require.NoError(t, err) + assert.Equal(t, workDir+"/dirB", root) + }) + + t.Run("override missing directory errors", func(t *testing.T) { + workDir := t.TempDir() + + _, err := resolveExtractRoot(workDir, "does-not-exist") + require.Error(t, err) + assert.Contains(t, err.Error(), "not found") + }) + + t.Run("override pointing at a file errors", func(t *testing.T) { + workDir := t.TempDir() + require.NoError(t, os.WriteFile(workDir+"/afile", nil, fileperms.PrivateFile)) + + _, err := resolveExtractRoot(workDir, "afile") + require.Error(t, err) + assert.Contains(t, err.Error(), "not a directory") + }) + + t.Run("non-local override is rejected", func(t *testing.T) { + workDir := t.TempDir() + + _, err := resolveExtractRoot(workDir, "../escape") + require.Error(t, err) + assert.Contains(t, err.Error(), "not a local path") + }) +} + +// TestArchiveFileRemove verifies that archive-scoped file-remove overlays are +// routed through the shared [applyNonSpecOverlay] machinery against the +// extract-root FS (i.e., the same code path that [processArchive] uses). +func TestArchiveFileRemove(t *testing.T) { + ctx := testctx.NewCtx() + + t.Run("deletes matching files in the extracted tree", func(t *testing.T) { + extractRoot := t.TempDir() + require.NoError(t, os.WriteFile(extractRoot+"/keep.txt", []byte("keep"), fileperms.PrivateFile)) + require.NoError(t, os.WriteFile(extractRoot+"/remove.conf", []byte("x"), fileperms.PrivateFile)) + + extractFS, err := rootfs.New(extractRoot) + require.NoError(t, err) + + defer extractFS.Close() + + overlay := projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg.tar.gz", + Filename: "*.conf", + } + + err = applyNonSpecOverlay(ctx, ctx.FS(), extractFS, overlay) + require.NoError(t, err) + + assert.FileExists(t, extractRoot+"/keep.txt") + assert.NoFileExists(t, extractRoot+"/remove.conf") + }) + + t.Run("with no match errors like a loose-file overlay", func(t *testing.T) { + extractRoot := t.TempDir() + require.NoError(t, os.WriteFile(extractRoot+"/file.txt", nil, fileperms.PrivateFile)) + + extractFS, err := rootfs.New(extractRoot) + require.NoError(t, err) + + defer extractFS.Close() + + overlay := projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg.tar.gz", + Filename: "*.conf", + } + + err = applyNonSpecOverlay(ctx, ctx.FS(), extractFS, overlay) + require.Error(t, err) + assert.Contains(t, err.Error(), "did not match any files") + }) +} diff --git a/internal/app/azldev/core/sources/sourceprep.go b/internal/app/azldev/core/sources/sourceprep.go index 617dcbd8..46781805 100644 --- a/internal/app/azldev/core/sources/sourceprep.go +++ b/internal/app/azldev/core/sources/sourceprep.go @@ -246,8 +246,7 @@ func (p *sourcePreparerImpl) PrepareSources( } if applyOverlays { - err := p.applyOverlaysToSources(ctx, component, outputDir) - if err != nil { + if err := p.applyOverlaysToSources(component, outputDir); err != nil { return err } @@ -274,11 +273,8 @@ func (p *sourcePreparerImpl) PrepareSources( // applyOverlaysToSources writes the macros file and then applies all overlays. func (p *sourcePreparerImpl) applyOverlaysToSources( - ctx context.Context, component components.Component, outputDir string, + component components.Component, outputDir string, ) error { - // Emit computed macros to a macros file in the output directory. - // If the build configuration produces no macros, no file is written and - // macrosFileName will be empty. var macrosFileName string macrosFilePath, err := p.writeMacrosFile(component, outputDir) @@ -291,32 +287,27 @@ func (p *sourcePreparerImpl) applyOverlaysToSources( macrosFileName = filepath.Base(macrosFilePath) } - // Apply all overlays to prepared sources. - if err := p.applyOverlays(ctx, component, outputDir, macrosFileName); err != nil { - return fmt.Errorf("failed to apply overlays for component %#q:\n%w", component.GetName(), err) + if err := p.applyOverlays(component, outputDir, macrosFileName); err != nil { + return fmt.Errorf("failed to apply overlays for component %#q:\n%w", + component.GetName(), err) } return nil } // applyOverlays applies all overlays (user-defined and system-generated) to the -// component sources. Overlay application is decoupled from git history generation: -// overlays modify the working tree; synthetic history is recorded separately by -// [trySyntheticHistory]. +// component sources. func (p *sourcePreparerImpl) applyOverlays( - _ context.Context, component components.Component, sourcesDirPath, macrosFileName string, + component components.Component, sourcesDirPath, macrosFileName string, ) error { event := p.eventListener.StartEvent("Applying overlays", "component", component.GetName()) defer event.End() - // Resolve the spec path once for all overlay operations in this call. absSpecPath, err := p.resolveSpecPath(component, sourcesDirPath) if err != nil { return err } - // Collect all overlays in application order. This ensures every change is - // captured in the synthetic history, including build configuration changes. allOverlays, err := p.collectOverlays(component, macrosFileName) if err != nil { return fmt.Errorf("failed to collect overlays for component %#q:\n%w", component.GetName(), err) @@ -326,7 +317,13 @@ func (p *sourcePreparerImpl) applyOverlays( return nil } - // Apply all overlays to the working tree. + // Archive overlays are applied first (they modify archived source files + // in-place), followed by spec and loose-file overlays. Each function + // self-filters to the overlay types it handles. + if err := p.applyArchiveOverlayGroup(component, sourcesDirPath, allOverlays); err != nil { + return err + } + if err := p.applyOverlayList(allOverlays, sourcesDirPath, absSpecPath); err != nil { return fmt.Errorf("failed to apply overlays for component %#q:\n%w", component.GetName(), err) } @@ -334,6 +331,40 @@ func (p *sourcePreparerImpl) applyOverlays( return nil } +// applyArchiveOverlayGroup applies the archive-scoped overlays contained in the +// given overlay list. The list may hold overlays of any type; only those for +// which [projectconfig.ComponentOverlay.ModifiesArchive] reports true are +// processed here. Skipped when source downloads were not performed. +func (p *sourcePreparerImpl) applyArchiveOverlayGroup( + component components.Component, + sourcesDirPath string, overlays []projectconfig.ComponentOverlay, +) error { + archiveOverlays := lo.Filter(overlays, func(overlay projectconfig.ComponentOverlay, _ int) bool { + return overlay.ModifiesArchive() + }) + + if len(archiveOverlays) == 0 { + return nil + } + + if p.skipLookaside { + slog.Warn("Skipping archive overlays because source downloads were skipped (--skip-sources)", + "component", component.GetName(), + "count", len(archiveOverlays)) + + return nil + } + + if err := applyArchiveOverlays( + p.dryRunnable, p.eventListener, sourcesDirPath, archiveOverlays, + ); err != nil { + return fmt.Errorf("failed to apply archive overlays for component %#q:\n%w", + component.GetName(), err) + } + + return nil +} + // collectOverlays gathers all overlays for a component into a single ordered slice: // macros-load first, then user overlays, followed by check-skip and file-header overlays. func (p *sourcePreparerImpl) collectOverlays( @@ -605,7 +636,7 @@ func (p *sourcePreparerImpl) DiffSources( } // Apply overlays in-place to the copied directory only. - if err := p.applyOverlaysToSources(ctx, component, overlaidDir); err != nil { + if err := p.applyOverlaysToSources(component, overlaidDir); err != nil { return nil, fmt.Errorf("failed to apply overlays for component %#q:\n%w", component.GetName(), err) } @@ -634,9 +665,23 @@ func (p *sourcePreparerImpl) DiffSources( // enforced by [projectconfig.ConfigFile.Validate]). Setting `ReplaceUpstream` = true without // a matching upstream entry is also an error: the user expressed intent to replace something // that isn't there, which almost certainly indicates a stale config or filename typo. -func (p *sourcePreparerImpl) updateSourcesFile(component components.Component, outputDir string) error { - sourceFiles := component.GetConfig().SourceFiles - if len(sourceFiles) == 0 { +func (p *sourcePreparerImpl) updateSourcesFile( + component components.Component, outputDir string, +) error { + config := component.GetConfig() + sourceFiles := config.SourceFiles + + // Derive the archives whose 'sources' hash needs refreshing because an archive + // overlay repacked them. Only meaningful when archive overlays actually ran: + // when skipLookaside is set, archive overlays are skipped (see + // [applyArchiveOverlayGroup]) and the archives may not even be present on disk, + // so rehashing would either fail or pointlessly rewrite an unchanged hash. + var modifiedArchives []string + if !p.skipLookaside { + modifiedArchives = archiveNamesFromOverlays(config.Overlays) + } + + if len(sourceFiles) == 0 && len(modifiedArchives) == 0 { return nil } @@ -647,7 +692,19 @@ func (p *sourcePreparerImpl) updateSourcesFile(component components.Component, o return err } - mergedLines, err := p.buildSourceEntries(sourceFiles, existingContent, component.GetName(), outputDir) + // Parse once, then rehash modified archives and merge source-files entries + // on the parsed representation — single parse, single write. + existingLines, err := fedorasource.ReadSourcesFile(existingContent) + if err != nil { + return fmt.Errorf("failed to parse 'sources' file %#q:\n%w", sourcesFilePath, err) + } + + // Rehash archives that were modified by archive overlays in-place. + if err := p.rehashModifiedEntries(existingLines, outputDir, modifiedArchives); err != nil { + return err + } + + mergedLines, err := p.buildSourceEntries(sourceFiles, existingLines, component.GetName(), outputDir) if err != nil { return err } @@ -666,6 +723,47 @@ func (p *sourcePreparerImpl) updateSourcesFile(component components.Component, o return nil } +// rehashModifiedEntries updates the Raw and Entry fields of parsed 'sources' lines +// for archives that were modified by archive overlays. The hash is recomputed using +// the same hash type as the original entry. +func (p *sourcePreparerImpl) rehashModifiedEntries( + lines []fedorasource.SourcesFileLine, outputDir string, modifiedArchives []string, +) error { + if len(modifiedArchives) == 0 { + return nil + } + + modified := make(map[string]bool, len(modifiedArchives)) + for _, name := range modifiedArchives { + modified[name] = true + } + + for idx, line := range lines { + if line.Entry == nil || !modified[line.Entry.Filename] { + continue + } + + archivePath := filepath.Join(outputDir, line.Entry.Filename) + + newHash, err := fileutils.ComputeFileHash(p.fs, line.Entry.HashType, archivePath) + if err != nil { + return fmt.Errorf("rehashing modified archive %#q:\n%w", line.Entry.Filename, err) + } + + slog.Debug("Rehashed modified archive in 'sources' file", + "archive", line.Entry.Filename, + "hashType", line.Entry.HashType, + "oldHash", line.Entry.Hash, + "newHash", newHash, + ) + + lines[idx].Raw = fedorasource.FormatSourcesEntry(line.Entry.Filename, line.Entry.HashType, newHash) + lines[idx].Entry.Hash = newHash + } + + return nil +} + // readSourcesFileIfExists reads the 'sources' file content if it exists, returning empty string if not. func (p *sourcePreparerImpl) readSourcesFileIfExists(sourcesFilePath string) (string, error) { exists, err := fileutils.Exists(p.fs, sourcesFilePath) @@ -685,32 +783,24 @@ func (p *sourcePreparerImpl) readSourcesFileIfExists(sourcesFilePath string) (st return string(data), nil } -// buildSourceEntries validates [projectconfig.SourceFileReference] entries and returns -// the merged set of lines ready to be written to the 'sources' file. Before returning, -// it logs an INFO-level event indicating that the 'sources' file will be updated, -// including the counts of newly added and replaced entries. +// buildSourceEntries merges user-declared [projectconfig.SourceFileReference] entries +// into the parsed 'sources' lines. Returns the final set of raw lines ready to be +// written to the 'sources' file. // // Output ordering and preservation: -// - Each line of [existingContent] is emitted verbatim, except for entry lines whose +// - Each existing line is emitted verbatim, except for entry lines whose // filename matches a replacement, which are swapped for the new formatted entry. -// Comments and blank lines from the original file are kept in their original positions. -// - Brand-new entries (no upstream filename collision) are appended after the upstream -// content in the order they appear in [sourceFiles]. +// Comments and blank lines are kept in their original positions. +// - Brand-new entries (no upstream filename collision) are appended after the +// existing content in the order they appear in [sourceFiles]. // // Collision rules and hash resolution are documented on [sourcePreparerImpl.processSourceRef]. func (p *sourcePreparerImpl) buildSourceEntries( sourceFiles []projectconfig.SourceFileReference, - existingContent string, + existingLines []fedorasource.SourcesFileLine, componentName string, outputDir string, ) (mergedLines []string, err error) { - existingLines, err := fedorasource.ReadSourcesFile(existingContent) - if err != nil { - return nil, fmt.Errorf( - "failed to parse existing 'sources' file at %#q:\n%w", - filepath.Join(outputDir, fedorasource.SourcesFileName), err) - } - // Index upstream entries by filename for O(1) collision lookup. The parser // (fedorasource.ReadSourcesFile) errors on duplicate filenames, so the // entries are guaranteed unique by the time we get here. @@ -1085,10 +1175,17 @@ func (p *sourcePreparerImpl) resolveSpecPath( } // applyOverlayList applies a list of overlays to the component sources sequentially. +// Archive-scoped overlays (see [projectconfig.ComponentOverlay.ModifiesArchive]) are +// skipped here; they are handled separately by [applyArchiveOverlays], which batches +// extraction and repacking per archive. func (p *sourcePreparerImpl) applyOverlayList( overlays []projectconfig.ComponentOverlay, sourcesDirPath, absSpecPath string, ) error { for _, overlay := range overlays { + if overlay.ModifiesArchive() { + continue + } + if err := ApplyOverlayToSources( p.dryRunnable, p.fs, overlay, sourcesDirPath, absSpecPath, ); err != nil { diff --git a/internal/app/azldev/core/sources/sourceprep_test.go b/internal/app/azldev/core/sources/sourceprep_test.go index 45a9d286..704bea69 100644 --- a/internal/app/azldev/core/sources/sourceprep_test.go +++ b/internal/app/azldev/core/sources/sourceprep_test.go @@ -5,6 +5,7 @@ package sources_test import ( "errors" + "os" "path/filepath" "strings" "testing" @@ -16,6 +17,7 @@ import ( "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders" "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders/fedorasource" "github.com/microsoft/azure-linux-dev-tools/internal/providers/sourceproviders/sourceproviders_test" + "github.com/microsoft/azure-linux-dev-tools/internal/utils/archive" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileperms" "github.com/microsoft/azure-linux-dev-tools/internal/utils/fileutils" "github.com/stretchr/testify/assert" @@ -99,6 +101,115 @@ func TestPrepareSources_Success(t *testing.T) { assert.NotContains(t, string(specContents), "Source9999") } +// TestPrepareSources_ArchiveOverlayRehashesSourcesEntry is an end-to-end check +// of the key correctness behavior introduced with archive overlays: when an +// archive-scoped overlay mutates an archive's contents, the matching 'sources' +// entry must be re-hashed in place so the recorded digest reflects the repacked +// archive (while keeping the original hash *type*). +// +// This runs against the host filesystem with a real temp dir because archive +// overlays extract/repack through the [archive] package, which uses OS +// primitives ([os.Root], os.*) and therefore requires genuine on-disk paths — +// an in-memory FS would not be visible to extraction/repacking. This mirrors +// the existing archive internal tests, which likewise use t.TempDir(). +func TestPrepareSources_ArchiveOverlayRehashesSourcesEntry(t *testing.T) { + const ( + componentName = "test-component" + archiveName = "pkg-1.0.tar.gz" + ) + + // Host FS + real temp dir: archive extraction/repacking happens on disk. + ctx := testctx.NewCtx(testctx.WithHostFS()) + outputDir := t.TempDir() + + archivePath := filepath.Join(outputDir, archiveName) + specPath := filepath.Join(outputDir, componentName+".spec") + sourcesPath := filepath.Join(outputDir, fedorasource.SourcesFileName) + + // Build a deterministic archive whose single top-level directory follows the + // conventional "%{name}-%{version}/" layout, containing a file we will remove + // and one we will keep. + stagingDir := t.TempDir() + pkgRoot := filepath.Join(stagingDir, "pkg-1.0") + require.NoError(t, os.MkdirAll(pkgRoot, fileperms.PublicDir)) + require.NoError(t, os.WriteFile(filepath.Join(pkgRoot, "keep.txt"), []byte("keep me"), fileperms.PrivateFile)) + require.NoError(t, os.WriteFile(filepath.Join(pkgRoot, "remove-me.txt"), []byte("delete me"), fileperms.PrivateFile)) + require.NoError(t, archive.CreateDeterministicArchiveAuto(archivePath, stagingDir)) + + // Record the original hash of the archive and seed a 'sources' file with it. + // Use SHA256 (not the SHA512 default) so the test also proves the hash *type* + // is preserved rather than coincidentally matching a default. + originalHash, err := fileutils.ComputeFileHash(ctx.FS(), fileutils.HashTypeSHA256, archivePath) + require.NoError(t, err) + + originalEntry := fedorasource.FormatSourcesEntry(archiveName, fileutils.HashTypeSHA256, originalHash) + require.NoError(t, fileutils.WriteFile( + ctx.FS(), sourcesPath, []byte(originalEntry+"\n"), fileperms.PublicFile)) + + ctrl := gomock.NewController(t) + component := components_testutils.NewMockComponent(ctrl) + sourceManager := sourceproviders_test.NewMockSourceManager(ctrl) + + component.EXPECT().GetName().AnyTimes().Return(componentName) + component.EXPECT().GetConfig().AnyTimes().Return(&projectconfig.ComponentConfig{ + Overlays: []projectconfig.ComponentOverlay{ + { + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: archiveName, + Filename: "remove-me.txt", + }, + }, + }) + + // The archive and 'sources' file already exist on disk; the source manager + // only needs to provide the spec file (FetchFiles is a no-op download). + sourceManager.EXPECT().FetchFiles(gomock.Any(), component, outputDir).Return(nil) + sourceManager.EXPECT().FetchComponent(gomock.Any(), component, outputDir, gomock.Any()).DoAndReturn( + func(_ interface{}, _ interface{}, dir string, _ ...sourceproviders.FetchComponentOption) error { + return fileutils.WriteFile( + ctx.FS(), filepath.Join(dir, componentName+".spec"), + []byte("# test spec"), fileperms.PublicFile) + }, + ) + + preparer, err := sources.NewPreparer(sourceManager, ctx.FS(), ctx, ctx) + require.NoError(t, err) + + err = preparer.PrepareSources(ctx, component, outputDir, true /*applyOverlays*/) + require.NoError(t, err) + + // The overlay must have actually mutated the archive on disk. + assert.FileExists(t, specPath) + + repackedHash, err := fileutils.ComputeFileHash(ctx.FS(), fileutils.HashTypeSHA256, archivePath) + require.NoError(t, err) + require.NotEqual(t, originalHash, repackedHash, + "precondition: removing a file from the archive should change its hash") + + // The 'sources' entry must have been rewritten to the repacked archive's hash, + // preserving the original SHA256 hash type. + sourcesContent, err := fileutils.ReadFile(ctx.FS(), sourcesPath) + require.NoError(t, err) + + parsedLines, err := fedorasource.ReadSourcesFile(string(sourcesContent)) + require.NoError(t, err) + + var entry *fedorasource.SourcesFileEntry + + for i := range parsedLines { + if parsedLines[i].Entry != nil && parsedLines[i].Entry.Filename == archiveName { + entry = parsedLines[i].Entry + + break + } + } + + require.NotNil(t, entry, "rewritten 'sources' file should still contain an entry for %q", archiveName) + assert.Equal(t, fileutils.HashTypeSHA256, entry.HashType, "original hash type must be preserved") + assert.Equal(t, repackedHash, entry.Hash, "'sources' entry must record the repacked archive's hash") + assert.NotEqual(t, originalHash, entry.Hash, "'sources' entry hash must have been updated") +} + func TestPrepareSources_SourceManagerError(t *testing.T) { ctrl := gomock.NewController(t) component := components_testutils.NewMockComponent(ctrl) @@ -854,7 +965,7 @@ func TestPrepareSources_UpdatesSourcesFile(t *testing.T) { existingSourcesContent: "SHA512 (dup.tar.gz) = aaaa1111\nSHA512 (dup.tar.gz) = bbbb2222\n", expectError: true, errorContains: []string{ - "failed to parse existing 'sources' file", + "failed to parse 'sources' file", "duplicate filename", "dup.tar.gz", }, diff --git a/internal/fingerprint/fingerprint_test.go b/internal/fingerprint/fingerprint_test.go index 4a6cf1eb..2248cd0c 100644 --- a/internal/fingerprint/fingerprint_test.go +++ b/internal/fingerprint/fingerprint_test.go @@ -251,6 +251,40 @@ func TestComputeIdentity_OverlaySourceFileChange(t *testing.T) { assert.NotEqual(t, fp1, fp2, "different overlay source content must produce different fingerprints") } +func TestComputeIdentity_OverlayArchiveScopingChangesFP(t *testing.T) { + // Archive-scoping fields are part of the hashed config, so setting them must + // change the fingerprint. Guards against excluding them (e.g. `fingerprint:"-"`), + // which would let an archive overlay skip a rebuild. + ctx := newTestFS(t, map[string]string{ + "/specs/test.spec": "Name: testpkg\nVersion: 1.0", + }) + releaseVer := testReleaseVer + + base := baseComponent() + base.Overlays = []projectconfig.ComponentOverlay{ + {Type: "file-remove", Filename: "bundled.conf"}, + } + fpBase := computeFingerprint(t, ctx, base, releaseVer, 0) + + withArchive := baseComponent() + withArchive.Overlays = []projectconfig.ComponentOverlay{ + {Type: "file-remove", Filename: "bundled.conf", Archive: "pkg-1.0.tar.gz"}, + } + fpArchive := computeFingerprint(t, ctx, withArchive, releaseVer, 0) + + assert.NotEqual(t, fpBase, fpArchive, + "setting the archive scope must change the fingerprint") + + withRoot := baseComponent() + withRoot.Overlays = []projectconfig.ComponentOverlay{ + {Type: "file-remove", Filename: "bundled.conf", Archive: "pkg-1.0.tar.gz", ArchiveRoot: "custom-root"}, + } + fpRoot := computeFingerprint(t, ctx, withRoot, releaseVer, 0) + + assert.NotEqual(t, fpArchive, fpRoot, + "setting the archive-root override must change the fingerprint") +} + func TestComputeIdentity_PatchAddRenameChangesFP(t *testing.T) { // When patch-add omits 'file', the destination filename is derived from // filepath.Base(Source). Renaming the source file changes the rendered diff --git a/internal/projectconfig/fingerprint_test.go b/internal/projectconfig/fingerprint_test.go index 3bd14eff..214bac56 100644 --- a/internal/projectconfig/fingerprint_test.go +++ b/internal/projectconfig/fingerprint_test.go @@ -5,6 +5,7 @@ package projectconfig_test import ( "reflect" + "strings" "testing" "github.com/microsoft/azure-linux-dev-tools/internal/projectconfig" @@ -104,15 +105,29 @@ func TestAllFingerprintedFieldsHaveDecision(t *testing.T) { tag := field.Tag.Get("fingerprint") - switch tag { + // hashstructure tags are `name,option,...`; the name part decides + // inclusion ("-" excludes, anything else includes) and the options + // tune how an included field is hashed. + name, options, _ := strings.Cut(tag, ",") + + switch name { case "": - // No tag — included by default (the safe default). + // Empty name — included by default (the safe default). The only + // option we permit is `omitempty`, which makes hashstructure skip + // the field when it holds its zero value (so an unset field never + // perturbs the hash) while still hashing it when set. Reject any + // other option as a likely typo. + if options != "" && options != "omitempty" { + assert.Failf(t, "invalid fingerprint tag", + "field %q has unrecognised fingerprint tag option %q — "+ + "only `omitempty` is supported on included fields", key, options) + } case "-": actualExclusions[key] = true default: - // hashstructure only recognises "" (include) and "-" (exclude). - // Any other value is silently treated as included, which is - // almost certainly a typo. + // hashstructure only recognises "" (include) and "-" (exclude) + // for the name part. Any other value is silently treated as + // included, which is almost certainly a typo. assert.Failf(t, "invalid fingerprint tag", "field %q has unrecognised fingerprint tag value %q — "+ "only `fingerprint:\"-\"` (exclude) is valid; "+ diff --git a/internal/projectconfig/overlay.go b/internal/projectconfig/overlay.go index 7ec0b50e..4e52380e 100644 --- a/internal/projectconfig/overlay.go +++ b/internal/projectconfig/overlay.go @@ -21,6 +21,13 @@ type ComponentOverlay struct { // Human readable description of overlay; primarily present to document the need for the change. Description string `toml:"description,omitempty" json:"description,omitempty" jsonschema:"title=Description,description=Human readable description of overlay" fingerprint:"-"` + // Scopes the overlay to files inside this source archive (a bare filename, not a path). + // Only file-remove and file-search-replace honor it; when set, the overlay operates inside + // the named archive instead of the loose sources tree. + Archive string `toml:"archive,omitempty" json:"archive,omitempty" jsonschema:"title=Archive,description=The source archive to modify (e.g. pkg-1.0.tar.gz)"` + // Overrides the archive's extraction root (rpmbuild's `%setup -n` equivalent). When unset, the + // root is inferred: a single top-level directory is used, otherwise the archive root. + ArchiveRoot string `toml:"archive-root,omitempty" json:"archiveRoot,omitempty" jsonschema:"title=Archive root,description=Top-level directory inside the archive to treat as the extraction root (mirrors %setup -n); inferred when unset"` // For overlays that apply to non-spec files, indicates the filename. For overlays that can // apply to multiple files, supports glob patterns (including globstar). Filename string `toml:"file,omitempty" json:"file,omitempty" jsonschema:"title=Filename,description=The name of the non-spec file to which this overlay applies, or a glob pattern matching multiple files"` @@ -119,10 +126,24 @@ func (c *ComponentOverlay) ModifiesSpec() bool { c.Type == ComponentOverlayRemovePatch } +// ModifiesArchive returns true if the overlay modifies files inside a source archive. +// These overlays require extraction and repacking of the archive. Only file-remove and +// file-search-replace support archive scoping, and only when their +// [ComponentOverlay.Archive] field is set. +func (c *ComponentOverlay) ModifiesArchive() bool { + return c.Archive != "" && + (c.Type == ComponentOverlayRemoveFile || c.Type == ComponentOverlaySearchAndReplaceInFile) +} + // ModifiesNonSpecFiles returns true if the overlay modifies non-spec files. This includes // hybrid overlays that modify both spec and source files (e.g., patch overlays), since -// those also require non-spec modifications. +// those also require non-spec modifications. Archive-scoped overlays (see [ModifiesArchive]) +// are excluded: they operate on files inside an archive, not loose files in the sources tree. func (c *ComponentOverlay) ModifiesNonSpecFiles() bool { + if c.ModifiesArchive() { + return false + } + return c.Type == ComponentOverlayPrependLinesToFile || c.Type == ComponentOverlaySearchAndReplaceInFile || c.Type == ComponentOverlayAddFile || @@ -178,7 +199,9 @@ const ( ComponentOverlaySearchAndReplaceInFile ComponentOverlayType = "file-search-replace" // ComponentOverlayAddFile is an overlay that adds a non-spec file. ComponentOverlayAddFile ComponentOverlayType = "file-add" - // ComponentOverlayRemoveFile is an overlay that removes a non-spec file. + // ComponentOverlayRemoveFile is an overlay that removes a non-spec file. When its + // [ComponentOverlay.Archive] field is set, it removes file(s) from inside that source + // archive instead of loose files in the sources tree. ComponentOverlayRemoveFile ComponentOverlayType = "file-remove" // ComponentOverlayRenameFile is an overlay that renames a non-spec file. ComponentOverlayRenameFile ComponentOverlayType = "file-rename" @@ -187,7 +210,7 @@ const ( // Validate checks that required fields are set based on the overlay type. This catches // configuration errors at load time rather than at apply time. // -//nolint:cyclop,gocognit,gocyclo,funlen // complexity is inherent to the number of overlay types. +//nolint:cyclop,gocognit,gocyclo,funlen,maintidx // complexity is inherent to the number of overlay types. func (c *ComponentOverlay) Validate() error { desc := c.Description if desc == "" { @@ -232,6 +255,33 @@ func (c *ComponentOverlay) Validate() error { return nil } + // The archive field scopes an overlay to operate inside a source archive. It is only + // accepted on file-remove and file-search-replace, and must be a bare filename (not a path). + if c.Archive != "" { + if c.Type != ComponentOverlayRemoveFile && c.Type != ComponentOverlaySearchAndReplaceInFile { + return unexpectedField("archive") + } + + if err := requireFileBasename("archive", c.Archive); err != nil { + return err + } + } + + // The archive-root override is only meaningful for archive-scoped overlays, and must be a + // local relative path so it cannot escape the extraction directory. + if c.ArchiveRoot != "" { + if !c.ModifiesArchive() { + return unexpectedField("archive-root") + } + + if !filepath.IsLocal(c.ArchiveRoot) { + return fmt.Errorf( + "overlay type %#q requires %#q to be a local relative path (no %#q or absolute paths); found %#q", + c.Type, "archive-root", "..", c.ArchiveRoot, + ) + } + } + switch c.Type { case ComponentOverlayAddSpecTag, ComponentOverlayInsertSpecTag, ComponentOverlaySetSpecTag, ComponentOverlayUpdateSpecTag: diff --git a/internal/projectconfig/overlay_test.go b/internal/projectconfig/overlay_test.go index 84ccf200..63734bc1 100644 --- a/internal/projectconfig/overlay_test.go +++ b/internal/projectconfig/overlay_test.go @@ -412,6 +412,117 @@ func TestComponentOverlay_Validate(t *testing.T) { errorExpected: true, errorContains: "section", }, + // archive-scoped file-remove tests (archive modifier) + { + name: "file-remove with archive valid (archive-scoped)", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + Filename: "unwanted.conf", + }, + errorExpected: false, + }, + { + name: "file-remove with archive glob valid", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + Filename: "docs/**/*.md", + }, + errorExpected: false, + }, + { + name: "file-remove without archive is a plain loose-file remove", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Filename: "unwanted.conf", + }, + errorExpected: false, + }, + { + name: "file-remove with archive missing file", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + }, + errorExpected: true, + errorContains: "file", + }, + { + name: "file-remove rejects archive path", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "subdir/pkg-1.0.tar.gz", + Filename: "unwanted.conf", + }, + errorExpected: true, + errorContains: "archive", + }, + { + name: "archive rejected on overlay type that cannot be archive-scoped", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayAddFile, + Archive: "pkg-1.0.tar.gz", + Filename: "new.conf", + Source: "files/new.conf", + }, + errorExpected: true, + errorContains: "archive", + }, + { + name: "file-remove with archive-root override valid", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + ArchiveRoot: "src-root", + Filename: "unwanted.conf", + }, + errorExpected: false, + }, + { + name: "file-remove rejects non-local archive-root", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + Archive: "pkg-1.0.tar.gz", + ArchiveRoot: "../escape", + Filename: "unwanted.conf", + }, + errorExpected: true, + errorContains: "archive-root", + }, + { + name: "archive-root rejected when archive unset", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlayRemoveFile, + ArchiveRoot: "src-root", + Filename: "unwanted.conf", + }, + errorExpected: true, + errorContains: "archive-root", + }, + { + name: "archive-root rejected on non-archive overlay", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlaySetSpecTag, + Tag: "Version", + Value: "1.0.0", + ArchiveRoot: "src-root", + }, + errorExpected: true, + errorContains: "archive-root", + }, + // file-search-replace supports archive scoping + { + name: "file-search-replace with archive valid (archive-scoped)", + overlay: projectconfig.ComponentOverlay{ + Type: projectconfig.ComponentOverlaySearchAndReplaceInFile, + Archive: "pkg-1.0.tar.gz", + Filename: "config.h", + Regex: "old_value", + Replacement: "new_value", + }, + errorExpected: false, + }, } for _, testCase := range testCases { @@ -455,6 +566,16 @@ func TestComponentOverlay_ModifiesSpec(t *testing.T) { projectconfig.ComponentOverlayAddFile, } + // Archive-scoped overlays: only file-remove/file-search-replace becomes archive-scoped, + // and only when its Archive field is set. + archiveOverlays := []projectconfig.ComponentOverlay{ + {Type: projectconfig.ComponentOverlayRemoveFile, Archive: "pkg-1.0.tar.gz", Filename: "f"}, + { + Type: projectconfig.ComponentOverlaySearchAndReplaceInFile, + Archive: "pkg-1.0.tar.gz", Filename: "f", Regex: "old", Replacement: "new", + }, + } + for _, overlayType := range specOverlayTypes { t.Run(string(overlayType)+"_is_spec_overlay", func(t *testing.T) { overlay := projectconfig.ComponentOverlay{Type: overlayType} @@ -468,4 +589,13 @@ func TestComponentOverlay_ModifiesSpec(t *testing.T) { assert.False(t, overlay.ModifiesSpec(), "expected %s to not be a spec overlay", overlayType) }) } + + for _, overlay := range archiveOverlays { + t.Run(string(overlay.Type)+"_is_archive_scoped", func(t *testing.T) { + assert.True(t, overlay.ModifiesArchive(), "expected %s to be an archive-scoped overlay", overlay.Type) + assert.False(t, overlay.ModifiesSpec(), "expected %s to not be a spec overlay", overlay.Type) + assert.False(t, overlay.ModifiesNonSpecFiles(), + "expected %s to not be a loose non-spec overlay", overlay.Type) + }) + } } diff --git a/internal/utils/archive/archive.go b/internal/utils/archive/archive.go index 65be9a18..a50dff61 100644 --- a/internal/utils/archive/archive.go +++ b/internal/utils/archive/archive.go @@ -79,6 +79,20 @@ func DetectCompression(filename string) (Compression, error) { } } +// ExtractAuto is a convenience wrapper that infers the compression from +// archivePath's extension via [DetectCompression] and then calls [Extract]. +// Most callers should prefer this over the explicit-compression [Extract], +// which exists for cases where the compression cannot be derived from the +// filename. +func ExtractAuto(archivePath, destDir string) error { + comp, err := DetectCompression(archivePath) + if err != nil { + return fmt.Errorf("detecting compression for %#q:\n%w", archivePath, err) + } + + return Extract(archivePath, destDir, comp) +} + // Extract reads a tar archive, decompresses it, and extracts all entries into // destDir. Supported entry types are regular files, directories, and symlinks; // other entry types are skipped. Entry paths are confined to destDir via @@ -174,6 +188,19 @@ func (r readerCloser) Close() error { return nil } +// CreateDeterministicArchiveAuto is a convenience wrapper that infers the +// compression from archivePath's extension via [DetectCompression] and then +// calls [CreateDeterministicArchive]. Most callers should prefer this over the +// explicit-compression [CreateDeterministicArchive]. +func CreateDeterministicArchiveAuto(archivePath, sourceDir string) error { + comp, err := DetectCompression(archivePath) + if err != nil { + return fmt.Errorf("detecting compression for %#q:\n%w", archivePath, err) + } + + return CreateDeterministicArchive(archivePath, sourceDir, comp) +} + // CreateDeterministicArchive creates a new tar archive from the contents of sourceDir // and writes it to archivePath on the OS filesystem, replacing any existing file. // diff --git a/scenario/__snapshots__/TestSnapshotsContainer_config_generate-schema_stdout_1.snap b/scenario/__snapshots__/TestSnapshotsContainer_config_generate-schema_stdout_1.snap index 7aa38d7e..9aa5a853 100755 --- a/scenario/__snapshots__/TestSnapshotsContainer_config_generate-schema_stdout_1.snap +++ b/scenario/__snapshots__/TestSnapshotsContainer_config_generate-schema_stdout_1.snap @@ -231,6 +231,16 @@ "title": "Description", "description": "Human readable description of overlay" }, + "archive": { + "type": "string", + "title": "Archive", + "description": "The source archive to modify (e.g. pkg-1.0.tar.gz)" + }, + "archive-root": { + "type": "string", + "title": "Archive root", + "description": "Top-level directory inside the archive to treat as the extraction root (mirrors %setup -n); inferred when unset" + }, "file": { "type": "string", "title": "Filename", diff --git a/scenario/__snapshots__/TestSnapshots_config_generate-schema_stdout_1.snap b/scenario/__snapshots__/TestSnapshots_config_generate-schema_stdout_1.snap index 7aa38d7e..9aa5a853 100755 --- a/scenario/__snapshots__/TestSnapshots_config_generate-schema_stdout_1.snap +++ b/scenario/__snapshots__/TestSnapshots_config_generate-schema_stdout_1.snap @@ -231,6 +231,16 @@ "title": "Description", "description": "Human readable description of overlay" }, + "archive": { + "type": "string", + "title": "Archive", + "description": "The source archive to modify (e.g. pkg-1.0.tar.gz)" + }, + "archive-root": { + "type": "string", + "title": "Archive root", + "description": "Top-level directory inside the archive to treat as the extraction root (mirrors %setup -n); inferred when unset" + }, "file": { "type": "string", "title": "Filename", diff --git a/schemas/azldev.schema.json b/schemas/azldev.schema.json index 7aa38d7e..9aa5a853 100644 --- a/schemas/azldev.schema.json +++ b/schemas/azldev.schema.json @@ -231,6 +231,16 @@ "title": "Description", "description": "Human readable description of overlay" }, + "archive": { + "type": "string", + "title": "Archive", + "description": "The source archive to modify (e.g. pkg-1.0.tar.gz)" + }, + "archive-root": { + "type": "string", + "title": "Archive root", + "description": "Top-level directory inside the archive to treat as the extraction root (mirrors %setup -n); inferred when unset" + }, "file": { "type": "string", "title": "Filename",