diff --git a/storage/drivers/driver.go b/storage/drivers/driver.go index 1ca50b6462..510ee700b2 100644 --- a/storage/drivers/driver.go +++ b/storage/drivers/driver.go @@ -310,6 +310,11 @@ type DifferOptions struct { // UseFsVerity defines whether fs-verity is used UseFsVerity DifferFsVerity + + // StagingDirectory is a writable directory the differ can use for + // temporary scratch data. It must reside on the same filesystem + // as the destination directory. + StagingDirectory string } // Differ defines the interface for using a custom differ. diff --git a/storage/drivers/overlay/overlay.go b/storage/drivers/overlay/overlay.go index 3ade1e7850..cd438e00c2 100644 --- a/storage/drivers/overlay/overlay.go +++ b/storage/drivers/overlay/overlay.go @@ -2242,7 +2242,8 @@ func (d *Driver) ApplyDiffWithDiffer(options *graphdriver.ApplyDiffWithDifferOpt logrus.Debugf("Applying differ in %s", applyDir) differOptions := graphdriver.DifferOptions{ - Format: graphdriver.DifferOutputFormatDir, + Format: graphdriver.DifferOutputFormatDir, + StagingDirectory: layerDir, } if d.usingComposefs { differOptions.Format = graphdriver.DifferOutputFormatFlat diff --git a/storage/pkg/chunked/storage_linux.go b/storage/pkg/chunked/storage_linux.go index ed9a5c7f07..a9f6caecba 100644 --- a/storage/pkg/chunked/storage_linux.go +++ b/storage/pkg/chunked/storage_linux.go @@ -36,6 +36,7 @@ import ( "go.podman.io/storage/pkg/chunked/internal/minimal" path "go.podman.io/storage/pkg/chunked/internal/path" "go.podman.io/storage/pkg/chunked/toc" + "go.podman.io/storage/pkg/fileutils" "go.podman.io/storage/pkg/fsverity" "go.podman.io/storage/pkg/idtools" "go.podman.io/storage/pkg/system" @@ -625,6 +626,40 @@ func collectIDs(entries []fileMetadata) ([]uint32, []uint32) { return mapToSlice(uids), mapToSlice(gids) } +func isReflinkNotSupported(err error) bool { + return errors.Is(err, unix.EOPNOTSUPP) || + errors.Is(err, unix.ENOSYS) || + errors.Is(err, unix.EXDEV) || + errors.Is(err, unix.EINVAL) +} + +func createReflink(srcRoot, srcPath, dstDir string) (string, error) { + parentDirfd, err := unix.Open(srcRoot, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return "", err + } + defer unix.Close(parentDirfd) + + srcFile, err := openFileUnderRoot(parentDirfd, srcPath, unix.O_RDONLY|unix.O_CLOEXEC, 0) + if err != nil { + return "", err + } + defer srcFile.Close() + + dstFile, err := os.CreateTemp(dstDir, "") + if err != nil { + return "", err + } + defer dstFile.Close() + + if err := fileutils.Reflink(srcFile, dstFile); err != nil { + os.Remove(dstFile.Name()) + return "", err + } + + return filepath.Base(dstFile.Name()), nil +} + type originFile struct { Root string Path string @@ -1777,6 +1812,29 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff wg.Wait() + // Reflink chunk dedup: when findChunkInOtherLayers locates a chunk + // in an existing layer, we cannot use that path directly because + // the source layer may be deleted before storeMissingFiles reads + // from it. Instead we reflink (CoW clone) the source file into a + // scratch directory so the copy is immune to concurrent deletion. + // If the filesystem does not support reflinks we skip chunk dedup + // entirely and fetch everything from the network. + var chunkRefsDir string + if differOpts != nil && differOpts.StagingDirectory != "" { + d, err := os.MkdirTemp(differOpts.StagingDirectory, "chunk-refs-") + if err != nil { + return output, fmt.Errorf("create chunk-refs directory: %w", err) + } + chunkRefsDir = d + defer os.RemoveAll(chunkRefsDir) + } + // reflinkMap caches reflinks: (source root, source path) → reflinked + // filename in chunkRefsDir. Multiple chunks at different offsets + // within the same source file share one reflink. + type reflinkKey struct{ root, path string } + reflinkMap := make(map[reflinkKey]string) + reflinkSupported := chunkRefsDir != "" + for _, res := range copyResults[:filesToWaitFor] { r := &mergedEntries[res.index] @@ -1820,15 +1878,39 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions, diff switch chunk.ChunkType { case minimal.ChunkTypeData: + if !reflinkSupported { + break + } root, path, offset, err := c.layersCache.findChunkInOtherLayers(chunk) if err != nil { return output, err } - if offset >= 0 && validateChunkChecksum(chunk, root, path, offset, c.copyBuffer) { + if offset < 0 { + break + } + key := reflinkKey{root: root, path: path} + refName, ok := reflinkMap[key] + if !ok { + refName, err = createReflink(root, path, chunkRefsDir) + if err != nil { + if isReflinkNotSupported(err) { + reflinkSupported = false + break + } + // ENOENT is expected: the source layer can + // be deleted concurrently. + if errors.Is(err, unix.ENOENT) { + break + } + return output, fmt.Errorf("create reflink for %q: %w", path, err) + } + reflinkMap[key] = refName + } + if validateChunkChecksum(chunk, chunkRefsDir, refName, offset, c.copyBuffer) { missingPartsSize -= size mp.OriginFile = &originFile{ - Root: root, - Path: path, + Root: chunkRefsDir, + Path: refName, Offset: offset, } } diff --git a/storage/pkg/fileutils/reflink_linux.go b/storage/pkg/fileutils/reflink_linux.go index 9f5c6c90bb..f0357da159 100644 --- a/storage/pkg/fileutils/reflink_linux.go +++ b/storage/pkg/fileutils/reflink_linux.go @@ -7,14 +7,19 @@ import ( "golang.org/x/sys/unix" ) +// Reflink attempts to reflink (CoW clone) the source to the destination fd. +// Returns an error if the filesystem does not support reflinks. +func Reflink(src, dst *os.File) error { + return unix.IoctlFileClone(int(dst.Fd()), int(src.Fd())) +} + // ReflinkOrCopy attempts to reflink the source to the destination fd. // If reflinking fails or is unsupported, it falls back to io.Copy(). func ReflinkOrCopy(src, dst *os.File) error { - err := unix.IoctlFileClone(int(dst.Fd()), int(src.Fd())) - if err == nil { + if err := Reflink(src, dst); err == nil { return nil } - _, err = io.Copy(dst, src) + _, err := io.Copy(dst, src) return err } diff --git a/storage/pkg/fileutils/reflink_unsupported.go b/storage/pkg/fileutils/reflink_unsupported.go index c0a30e670c..2da19d74be 100644 --- a/storage/pkg/fileutils/reflink_unsupported.go +++ b/storage/pkg/fileutils/reflink_unsupported.go @@ -3,10 +3,17 @@ package fileutils import ( + "errors" "io" "os" ) +// Reflink attempts to reflink (CoW clone) the source to the destination fd. +// Returns an error if the filesystem does not support reflinks. +func Reflink(src, dst *os.File) error { + return errors.ErrUnsupported +} + // ReflinkOrCopy attempts to reflink the source to the destination fd. // If reflinking fails or is unsupported, it falls back to io.Copy(). func ReflinkOrCopy(src, dst *os.File) error {